def bibFile(self, line, robust=False): '''Opening bibliography file''' bibliography = biblist.BibList() bibliography.import_bibtex(line, normalize=False) for key in bibliography.bib.keys(): curArt = bibliography.bib[key] curAuth = zip(*curArt['author'])[1:3] if len(curAuth[0]) > 1: if len(curAuth[0]) > self.etal: curAuth = curAuth[0][0] + ' et. al.' else: curAuth = ', '.join( curAuth[0][:-1]) + ' and ' + curAuth[0][-1] else: curAuth = curAuth[0][0] self.cite['t'][bibliography.bib[key] ['_code']] = '{0} ({1})'.format( curAuth, curArt['year']) self.cite['p'][bibliography.bib[key]['_code']] = '{0}, {1}'.format( curAuth, curArt['year']) if robust: publish_display_data( 'printTex', { 'text/plain': '{0} loaded, there are {1} keys ({2})'.format( line, len(self.cite['t'].keys()), ', '.join( self.cite['t'].keys())), 'text/html': markdown('_{0}_ loaded<br/>There are __{1}__ keys:<br/>{2}' .format(line, len(self.cite['t'].keys()), ', '.join(self.cite['t'].keys()))) })
def _original_bibtex(self): TEMPLATE = 'http://dl.acm.org/exportformats.cfm?id=%s&expformat=bibtex&_cf_containerId=theformats_body&_cf_nodebug=true&_cf_nocache=true&_cf_clientid=142656B43EEEE8D6E34FC208DBFCC647&_cf_rc=3' url = TEMPLATE % self.id d = pq(urlread(url)) content = d('pre').text() from StringIO import StringIO f = StringIO(content) b = biblist.BibList() ret = b.import_bibtex(f) assert ret, content return b
def _parse_bibtex(self, f): b = biblist.BibList() ret = b.import_bibtex(f) if not ret: try: f.seek(0) content = '\n' + f.read() except: content = f logging.debug('parse bibtex failed:%s' % content) return None return b
def from_bibtex(f): b = biblist.BibList() ret = b.import_bibtex(f) assert ret return [IEEE.from_bibtex_item(it) for it in b.get_items()]
def from_bibtex(f): b = biblist.BibList() ret = b.import_bibtex(f) assert ret return [ACM.from_title(it['title']) for it in b.get_items()]
def main(): import optparse usage = """usage: %prog [options] datafile1 [datafile2 ...] Extracts a BibTeX database according to an aux or tex file. Keeps only those items that are cited DESCRIPTION It reads an *.aux file as produced by LaTeX or a *.tex file directly and writes to standard output a bibtex file containing exactly the bibtex entries refereed in the aux file. NOTE: If the environment variable BIBDB is set, this is used as bibliography database ************************************************************ USE %prog --help for details ************************************************************ """ parser = optparse.OptionParser( usage, version=" %prog with biblio-py-{0}".format(VERSION)) parser.add_option( "-d", "--database", action='append', type='string', help="Database to use, default: %s. May be used more than once" % (dumpfile)) parser.add_option("-l", "--list", action="store_true", dest="list", default=False, help="List cited keys to stdout (screen)") parser.add_option("-o", "--output", default=None, help="Output file. Use '-' for stdout (screen)") parser.add_option( "", "--remove-common", action="store_true", default=False, help= "Remove \"url from articles\", \"doi, issn, month and abstracts from everything\"" ) parser.add_option( "", "--remove-fields", action='append', type='string', help= "Remove fields from types. Notations is \"field:type1,type2,..,typen\" to remove field from these types (for instance ARTICLES and BOOKS but not for INPROCEEDINGS), Use \"field\" (with no \":\") for removing the field for all types. It can be used more than once for removing several fields" ) parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=True, help="Give some informational messages. [default]") parser.add_option("-q", "--quiet", action="store_false", dest="verbose", help="Suppress some messages.") (op, args) = parser.parse_args() if len(args) < 1: parser.error( "Incorrect number of arguments. You have to give a source filename" ) for fname in args: # Read the source file if fname.endswith('tex'): dbf, cit = parse_texfile(fname) elif fname.endswith('aux'): dbf, cit = parse_auxfile(fname) else: parser.error('Incorrect argument "%s"' % (fname)) if dbf is not None: dbf = find_bibfile(dbf) # Determine the database dbfiles = [] if op.database is not None: # command-line option overrides others options dbfiles = op.database elif dbf is not None: # Then, bibliography from source file dbfiles = dbf elif dumpfile is not None: dbfiles += [dumpfile] else: parser.error('No Database found') # Read the database(s) b = biblist.BibList() for fname in dbfiles: if op.verbose: print('# Loading database {} ...'.format(fname)) failed = False if 'dmp' in fname: try: b.load(fname) except BaseException: failed = True elif 'bib' in fname: try: b.import_bibtex(fname, normalize=False) except BaseException: failed = True else: failed = True if failed: mensaje = 'Database file {} not found or failed to load.\n'.format( fname) mensaje += 'Set the name as an option or set the environment variable BIBDB' parser.error(mensaje) if op.output is None: output = os.path.splitext(args[0])[0] + '.bib' else: output = op.output # Set fields to remove rem = [] if op.remove_common: rem = rem + ['url:article', 'issn', 'doi', 'month', 'abstract'] if op.remove_fields is not None: rem = rem + op.remove_fields ######################################################################## if op.list: print('\n'.join(sorted(cit))) bout = biblist.BibList() # All keys from databases citekeys = dict([(b.get_item(k).get_field('_code'), k) for k in b.ListItems]) for k in cit: if k in list(citekeys.keys()): if citekeys[k] in b.ListItems: item = b.get_item(citekeys[k]) if rem != []: for cond in rem: remove_fields(item, cond) bout.add_item(item, k) else: print('# Warning: %s not found in database' % (k)) mensaje = '# created with: %s\n' % (' '.join(sys.argv)) # print(type(bout.to_bibtex())) # print(bout.to_bibtex()) fi = open(output, encoding='utf-8', mode='w') fi.write(mensaje + bout.to_bibtex()) fi.close() if op.verbose: print('Items saved to %s' % (output))
'director': ('<BR><span class="director">', '</span>. ') } css_style = """.title a, .title {font-weight: bold; color : #416DFF; } ol.bibliography li{ nmargin-bottom:0.5em;} .year:before {content:" (";} .year:after {content:").";} .authors {font-weight:bold; display:list;} .authors:after {content:". ";} .director:before{content:"Director: ";} """ head = ''' <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> <style type="text/css"> {0}</style> <title>Tesis Doctorales</title> </head> <body> <h2>Tesis Doctorales (PhD Thesis)</h2> <ol class="bibliography"> '''.format(css_style) b = biblist.BibList() b.import_bibtex(bibfile) b.sort(['year', 'author', 'reverse']) b.export_html(outputfile, head=head, style=htmlstyle, separate_css=False)
def main(): # CONFIGURACION ############################################################ def get_strng_field(k): # l= str(k,encoding=encoding).split(':') l = k.split(':') if len( l ) == 1: # argument was on the form 'search_string. To search in all fields ff = [] ss = l[0] elif len(l) == 2: if l[0] == '': ss = '*' # Search all strings else: ss = l[0] if l[1] == '': ff = [] # Search in all fields else: ff = l[1].split(':') return ss, ff ########################################################################## # Command line options ########################################################################## usage = """usage: %prog [options] [datafile1] [datafile2 ...] Ejemplo de uso: $> %prog --search=LastName1:author --search=LastName2:author --search=LastName3:author --startyear=2000 --endyear=2008 --filter-exclude=2006:year --filter-exclude=LastName4:author --sort=year,month,author --format=tex --output=salida.tex biblio1.bib biblio2.bib.bz2 biblio1.dmp biblio2.dmp.gz Will get data from two bibtex files (biblio1.bib and biblio2.bib.bz2) and two dump files (biblio1.dmp and biblio2.dmp.gz) and retain all entries between 2000 and 2008 (except those of 2006) by authors LastName1,LastName2 and LastName3 but where LastName4 is not an author. The search is case insensitive. The output is written in latex form, ordered by key in ascending order, to the file salida.tex $> %prog - -o biblio.html Will get the data from standard input in BibTeX format and output it in html form to the file biblio.html ******** Working with pipes ******** $> %prog -s LastName1:author biblio1.bib -f bib -o - | %prog -s LastName2:author biblio2.dmp - -o biblio.html Will get the items with LastName1 as author from biblio1.bib and the results are taken as input to merge with items by LastName2 from database biblio2.dmp. The output is in html format to the file biblio.html Note that two of the input files are compressed """ parser = optparse.OptionParser( usage, version=" %prog with biblio-py-{0}".format(VERSION)) parser.add_option("", "--list", action="store_true", help="List the database contents") parser.add_option( "", "--sort", help= "Sort the items according to the following fields, for instance to sort them accoding to year and then author we would use --sort=year,author. In the same example, to sort in reverse order we would use: --sort=year,author,reverse. DEFAULT: key." ) parser.add_option( "-s", "--search", action='append', type='string', help= 'SEARCH is a (COLON separated) pair "string_to_search:fields". If the field is empty defaults to ALL. Fields may be more than one. In that case it can be written as "field1,field2,...". This option may be used more than once' ) parser.add_option( "--year", default=None, help="--year=y is a shortcut to '--start-year=y --end-year=y'") parser.add_option("-b", "--startyear", type='int', default=0, help='Start Year') parser.add_option("-e", "--endyear", type='int', default=9999, help='End Year') parser.add_option( "-i", "--filter-include", action='append', type='string', help= 'Include all entries that verify the condition, given in the form string1:field1,field2,... It may be used more than once and only entries that verify ALL conditions will be retained.' ) parser.add_option( "-x", "--filter-exclude", action='append', type='string', help= 'Exclude all entries that verify the condition, given in the form string1:field1,field2,... It may be used more than once and only entries that do not verify ANY condition will be retained.' ) parser.add_option("", "--keep-keys", action="store_true", default=False, help="Keep the original cite key") parser.add_option("-I", "--case-sensitive", action="store_true", default=False, help="Make the search case sensitive") parser.add_option( "-o", "--output", default=None, help="Output file. Use '-' for stdout (screen). DEFAULT: No output") parser.add_option( "-f", "--format", default=None, help= "format of output, possible values are: short, full, bibtex, tex, html, xml DEFAULT= short" ) parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Give some informational messages.") parser.add_option( "-d", "--save-dump", help="Save (dump) the database IN INTERNAL FORM for faster access") (op, args) = parser.parse_args() if args == []: dbfiles = [dumpfile] else: dbfiles = args modify_keys = not op.keep_keys available_formats = { 's': 'short', 'f': 'full', 't': 'latex', 'b': 'bibtex', 'h': 'html', 'x': 'xml' } # Try to guess the format from the extension of the file if op.format is None: # Guess a format if op.output == '-': formato = 'short' elif op.output is not None: ext = os.path.splitext(op.output)[1][1] formato = available_formats.get(ext, 'short') else: formato = available_formats.get(op.format[0].lower(), 'short') ########################################################################## # Create the List object b = biblist.BibList() ########################################################################## # Read the database(s) if op.verbose: print('# Loading database...') # b = biblist.BibList() for fname in dbfiles: failed = False if '.dmp' in fname: try: b.load(fname) except BaseException: failed = True elif '.bib' in fname or fname == '-': try: b.import_bibtex(fname, normalize=modify_keys) except BaseException: failed = True else: failed = True if op.verbose: print('# %d new items read' % (len(b.ListItems))) if failed: mensaje = 'Database file %s not found or failed to load. Set the name as an option or set the environment variable BIBDB\n' % ( fname) parser.error(mensaje) if op.sort is not None: sortorder = op.sort.lower().split(',') if 'reverse' in sortorder: reverse = True else: reverse = False if reverse: sortorder.remove('reverse') else: sortorder = [] reverse = False ########################################################################## # Do the required action(s) bout = biblist.BibList() items = b.sortedList[:] # All items bout.abbrevDict.update(b.abbrevDict) if op.list: b.sort(sortorder, reverse) print('\n'.join(b.sortedList)) return for k in items: year = int(b.get_item(k).get_field('year', str(op.startyear))) if year >= op.startyear and year <= op.endyear: bout.add_item(b.get_item(k), k) if op.search is not None: items = [] # overwrite items from sort for cond in op.search: ss, ff = get_strng_field(cond) # search and append the results. items.extend( bout.search(findstr=ss, fields=ff, caseSens=op.case_sensitive)) for it in bout.sortedList[:]: # purge not found items if it not in items: bout.remove_item(it) if op.filter_exclude is not None: items = [] for cond in op.filter_exclude: ss, ff = get_strng_field(cond) items.extend( b.search(findstr=ss, fields=ff, caseSens=op.case_sensitive)) for it in bout.sortedList[:]: # purge found items if it in items: bout.remove_item(it) if op.filter_include is not None: items = [] cond = op.filter_include[0] ss, ff = get_strng_field(cond) items = b.search(findstr=ss, fields=ff, caseSens=op.case_sensitive) for cond in op.filter_include[1:]: ss, ff = get_strng_field(cond) its = b.search(findstr=ss, fields=ff, caseSens=op.case_sensitive) for c in items[:]: if c in items and c not in its: items.remove(c) for it in bout.sortedList[:]: # purge not found items if it not in items: bout.remove_item(it) # First sort if op.sort is not None: bout.sort(sortorder, reverse=reverse) if op.output is not None: bout.output(op.output, formato, op.verbose) else: print('# %d items processed' % (len(bout.ListItems))) if op.save_dump is not None: if op.verbose: print('# Saving database to %s...' % (op.save_dump)) bout.dump(op.save_dump)
#!//anaconda/bin/python # python bib-graph.py input_file.bib # Example: python bib-graph.py "../data/Biblio-perso-globale-fr.bib" import sys import yapbib.biblist as biblist import networkx as nx from networkx.readwrite import json_graph import json bibfile_input = sys.argv[1] bib_handle = biblist.BibList() bib_handle.import_bibtex(bibfile_input, normalize=False) items = bib_handle.List() # Shows the keys of all entries bib_graph = nx.Graph() pos = dict() for item_index in items: item_handle = bib_handle.get_item(item_index) for author_index, author_handle in enumerate( item_handle.get_authorsList()[:len(item_handle.get_authorsList()) - 1]): #print author_handle clean_author_handle = str(author_handle) firstname_initial = clean_author_handle.split()[0][0] lastname = clean_author_handle.split()[-1] clean_author_handle = firstname_initial + '. ' + lastname for next_author_index, next_author_handle in enumerate(
def main(): '''Retrieve papers list''' # Declara algunas variables autores = "Einstein,A;Schrodinger,E" thisyear = datetime.date.today().year ######################################################################### parser = OptionParser(version="%prog with biblio-py-{0}".format(VERSION)) parser.add_option("-o", "--output-file", help="write to FILE. Default: standard output", metavar="FILE", default='-') parser.add_option( "-f", "--format", default=None, help= "format of output, possible values are: short, full, bibtex, tex, html, xml DEFAULT= bib" ) parser.add_option("-v", "--verbose", action="store_true", dest="verbose", default=False, help="Give some informational messages.") parser.add_option("-b", "--start-year", default=str(thisyear), help="Starting year as integer (4 digits)") parser.add_option("-e", "--end-year", default=str(thisyear), help="Ending year as integer (4 digits)") parser.add_option( "-y", "--year", default=None, help="--year=y is a shortcut to '--start-year=y --end-year=y'") parser.add_option("--start-month", default='1', help="Starting month as integer (Jan == 1, Dec == 12)") parser.add_option("--end-month", default='12', help="Ending month as integer (Jan == 1, Dec == 12)") parser.add_option( "-a", "--author", default=autores, help="list of semicolon separated author names as last,f") parser.add_option( "--author-logic", default='AND', help= "Logic to use to search for authors. Default: 'AND'. Use 'OR' to get ALL authors articles" ) parser.add_option("--proxy", default=None, help="Proxy used to connect") parser.add_option( "--advanced-options", default=None, help= """Additional options supported by Harvard. They should be written as option1:value1;option2:value2,... To get a list of options use: '--help-advanced-options'""" ) parser.add_option( "--help-advanced-options", action="store_true", default=False, help= "Show information on additional options supported by Harvard ADS site") parser.add_option("-d", "--save-dump", help="Save (dump) the database IN INTERNAL FORM") parser.add_option( "", "--sort", default='key', help= "Sort the items according to the following fields, for instance to sort them accoding to year and then author we would use --sort=year,author. In the same example, to sort in reverse order we would use: --sort=year,author,reverse. DEFAULT: key." ) (op, args) = parser.parse_args() if op.help_advanced_options != False: if op.verbose: print('Complete list of possible options supported by Harvard:') for k, v in ads.all_param.items(): print(' %18s : %s' % (k, v)) else: print('The more important parameters are:') for k, v in ads.param_relevantes.items(): print(' %18s : %s' % (k, v)) print('** To get a complete list use also --verbose **') return 1 if op.proxy != None: conexion = {'http_proxy': op.proxy} else: conexion = {} available_formats = { 's': 'short', 'f': 'full', 't': 'latex', 'b': 'bibtex', 'h': 'html', 'x': 'xml' } output_file = op.output_file if op.format == None: # Guess a format if output_file != '-': ext = os.path.splitext(output_file)[1][1] if ext in 'tbhx': formato = available_formats.get(ext) else: formato = 'bibtex' else: formato = 'bibtex' else: formato = available_formats.get(op.format[0].lower()) ######################################################################################### opciones = {} opciones['start_year'] = op.start_year opciones['end_year'] = op.end_year if op.year != None: opciones['start_year'] = opciones['end_year'] = op.year opciones['start_mon'] = op.start_month opciones['end_mon'] = op.end_month opciones['author'] = op.author opciones['aut_logic'] = op.author_logic if op.advanced_options != None: for o in op.advanced_options.split(';'): k, v = o.split(':') opciones[k] = v ######################################################################################### # Create the List object b = biblist.BibList() ######################################################################################### Query = ads.AdsQuery(connection=conexion, options=opciones) nabst, page = Query.query() if nabst < 0: print('Error (%d), %s' % (nabst, page)) sys.exit() else: if op.verbose: print('%d items downloaded' % (nabst)) # Load the results into the biblist object fi = io.StringIO(page) n = b.import_ads(fi) if op.verbose: print('# %d items downloaded, total number of items %d' % (n, len(b.ListItems))) sortorder = op.sort.lower().split(',') if 'reverse' in sortorder: reverse = True else: reverse = False if reverse: sortorder.remove('reverse') b.sort(sortorder, reverse=reverse) if op.save_dump != None: if op.verbose: print('# Saving database to %s...' % (op.save_dump)) b.dump(op.save_dump) b.output(output_file, formato, op.verbose)