def norm_author(record): """ Transforms the author field into an ordered list of last names Args: record (Dict[str]): record containing an author field Returns: str: normalized author names Examples: >>> records = [{'author': 'Siegfried Fischbacher and Uwe Ludwig Horn'}, ... {'author': 'Fischbacher, S. and Horn, U.'}, ... {'author': 'Fischbacher, Siegfried and Horn, Uwe ' ... 'Ludwig'} ... ] >>> [norm_author(rec) for rec in records] ['Fischbacher Horn', 'Fischbacher Horn', 'Fischbacher Horn'] >>> norm_author({'author': 'François Augiéras'}) 'Augieras' >>> norm_author({'author': 'Avraham (Abraham), Uri and ' ... 'Ihoda (Haim Judah), Jaime'}) 'Avraham Ihoda' """ authors = record['author'].split(' and ') authors = bc.getnames(authors) # Correct "Name, Surname"-format authors = [a.split(',')[0] for a in authors] authors = list(map(_norm_author, authors)) authors.sort() return ' '.join(authors)
def test_getnames(self): names = ['Foo Bar', 'Foo B. Bar', 'F. B. Bar', 'F.B. Bar', 'F. Bar', 'Jean de Savigny', 'Jean la Tour', 'Jean le Tour', 'Mike ben Akar', #'Jean de la Tour', #'Johannes Diderik van der Waals', ] result = getnames(names) expected = ['Bar, Foo', 'Bar, Foo B.', 'Bar, F. B.', 'Bar, F. B.', 'Bar, F.', 'de Savigny, Jean', 'la Tour, Jean', 'le Tour, Jean', 'ben Akar, Mike', #'de la Tour, Jean', #'van der Waals, Johannes Diderik', ] self.assertEqual(result, expected)
def test_getnames(self): names = [ 'Foo Bar', 'Foo B. Bar', 'F. B. Bar', 'F.B. Bar', 'F. Bar', 'Jean de Savigny', 'Jean la Tour', 'Jean le Tour', 'Mike ben Akar', #'Jean de la Tour', #'Johannes Diderik van der Waals', ] result = getnames(names) expected = [ 'Bar, Foo', 'Bar, Foo B.', 'Bar, F. B.', 'Bar, F. B.', 'Bar, F.', 'de Savigny, Jean', 'la Tour, Jean', 'le Tour, Jean', 'ben Akar, Mike', #'de la Tour, Jean', #'van der Waals, Johannes Diderik', ] self.assertEqual(result, expected)
def customizations(record): for n in ["author", "editor"]: if n in record: a = [i for i in record[n].replace("\n", " ").split(", ")] b = [i.split(" and ") for i in a] c = [item for sublist in b for item in sublist] d = [i.strip() for i in c] record[n] = getnames(d) return record
def customizations(record): for n in ['author', 'editor']: if n in record: a = [i for i in record[n].replace('\n', ' ').split(', ')] b = [i.split(" and ") for i in a] c = [item for sublist in b for item in sublist] d = [i.strip() for i in c] record[n] = getnames(d) return record
def get_names(self): """ Get a list of names from the reference. .. code-block:: python >>> with open("bib/1968_chow.bib", "r") as fh: ... _citation = bibtexparser.load(fh) >>> db = Reference(_citation) >>> print(db.get_names()) ['Friedman, Nir', 'Geiger, Dan', 'Goldszmidt, Moises'] """ return getnames([ i.strip() for i in self.db["author"].replace("\n", " ").split(" and ") ])
def editor(record): """ Split editor field into a list of "Name, Surname". :param record: the record. :type record: dict """ if "editor" in record: if record["editor"]: record["editor"] = customization.getnames([ i.strip() for i in record["editor"].replace("\n", " ").split(" and ") ]) else: del record["editor"] return record
def btex_custom(self, record): r = convert_to_unicode(record) if "pages" in record: # fix -- -> – if "-" in record["pages"]: p = [i.strip().strip('-') for i in record["pages"].split("-")] record["pages"] = p[0] + u'–' + p[-1] authors = r.get('author') if not authors: authors = r.get('editor', 'Anon.') _authors = getnames(authors.split(" and ")) _and_surnames = self.and_authors( [s.split(",")[0].strip() for s in _authors]) r['author'] = self.and_authors(_authors) r['surnames'] = _and_surnames r['author_year'] = _and_surnames + u" " + r['year'] r['unique_suffix'] = self.unique_suffix(r['author_year']) r['author_year'] += r['unique_suffix'] r['title'] = r['title'] # .replace("{", "").replace("}","") return r
def btex_custom(self, record): r = convert_to_unicode(record) if "pages" in record: # fix -- -> – if "-" in record["pages"]: p = [i.strip().strip('-') for i in record["pages"].split("-")] record["pages"] = p[0] + u'–' + p[-1] authors = r.get('author') if not authors: authors = r.get('editor', 'Anon.') _authors = getnames(authors.split(" and ")) _and_surnames = self.and_authors( [s.split(",")[0].strip() for s in _authors]) r['author'] = self.and_authors(_authors) r['surnames'] = _and_surnames r['author_year'] = _and_surnames + u" " + r.get('year', '') r['unique_suffix'] = self.unique_suffix(r['author_year']) r['author_year'] += r['unique_suffix'] r['title'] = r['title'] # .replace("{", "").replace("}","") return r
def custom(record): try: record = c.convert_to_unicode(record) except TypeError as e: logging.warning("Unicode Error on: {}".format(record['ID'])) record['error'] = 'unicode' try: #add md5 of associated files files = [add_slash_if_necessary(y) for x in record['file'].split(';') for y in x.split(':') if bool(y.strip()) and y.strip().lower() != 'pdf'] file_set = set(files) if not 'hashes' in record: hashes = [file_to_hash(x) for x in file_set] record['hashes'] = ";".join(hashes) #regularize format of files list record['file'] = ";".join(file_set) except Exception as e: logging.warning("File Error: {} : {}".format(record['ID'], e.args[0])) record['error'] = 'file' #todo: if file is not in the library common prefix, move it there #look for year, then first surname, then copy in, making dir if necessary if file_set: for x in file_set: try: current_path = realpath(x) common = commonpath([current_path, args.library]) if common != args.library: logging.info("Found file outside library: {}".format(current_path)) logging.info("Common: {}".format(common)) #get the author and year year = record['year'] authors = c.getnames([i.strip() for i in record["author"].replace('\n', ' ').split(" and ")]) authors_split = [c.splitname(a) for a in authors] author_surnames = [a['last'][0] for a in authors_split] new_path = join(args.library, year, ", ".join(author_surnames)) logging.info("New Path: {}".format(new_path)) #create directory if necessary #copy file full_new_path = join(new_path, split(current_path)[1]) logging.info("Copying file") logging.info("From: {}".format(current_path)) logging.info("To: {}".format(full_new_path)) response = input("Enter to confirm: ") if response == "": logging.info("Proceeding") if not exists(new_path): mkdir(new_path) if exists(full_new_path): raise Exception("File already exists") copyfile(x, full_new_path) file_set.remove(x) file_set.add(full_new_path) record['file'] = ";".join(file_set) except Exception as e: logging.info("Issue copying file for: {}".format(x)) logging.info(e) record['error'] = 'file_copy' #regularize keywords try: keywords = set() if 'tags' not in record: if 'keywords' in record: keywords.update([x.strip() for x in record['keywords'].split(',')]) del record['keywords'] if 'mendeley-tags' in record: keywords.update([x.strip() for x in record['mendeley-tags'].split(',')]) del record['mendeley-tags'] record['tags'] = ",".join(keywords) except Error as e: logging.warning("Tag Error: {}".format(record['ID'])) record['error'] = 'tag' # record = c.type(record) # record = c.author(record) # record = c.editor(record) # record = c.journal(record) # record = c.keyword(record) # record = c.link(record) # record = c.doi(record) # record['p_authors'] = [] # if 'author' in record: # record['p_authors'] = [c.splitname(x, False) for x in record['author']] return record
def test_getnames_braces(self): names = ['A. {Delgado de Molina}', 'M. Vign{\\\'e}'] result = getnames(names) expected = ['Delgado de Molina, A.', 'Vigné, M.'] self.assertEqual(result, expected)
def test_getnames_add_double_dot(self): names = ['FG Bar', 'CQ Lux'] result = getnames(names) expected = [['F. G.', 'Bar'], ['C. Q.', 'Lux']] self.assertEqual(result, expected)
def test_getnames_add_single_dot(self): names = ['F Bar', 'C Lux'] result = getnames(names) expected = [['F.', 'Bar'], ['C.', 'Lux']] self.assertEqual(result, expected)