Exemple #1
0
def norm_author(record):
    """ Transforms the author field into an ordered list of last names

    Args:
        record (Dict[str]): record containing an author field

    Returns:
        str: normalized author names

    Examples:
        >>> records = [{'author': 'Siegfried Fischbacher and Uwe Ludwig Horn'},
        ...            {'author': 'Fischbacher, S. and Horn, U.'},
        ...            {'author': 'Fischbacher, Siegfried and Horn, Uwe '
        ...                       'Ludwig'}
        ...           ]
        >>> [norm_author(rec) for rec in records]
        ['Fischbacher Horn', 'Fischbacher Horn', 'Fischbacher Horn']
        >>> norm_author({'author': 'François Augiéras'})
        'Augieras'
        >>> norm_author({'author': 'Avraham (Abraham), Uri and '
        ...              'Ihoda (Haim Judah), Jaime'})
        'Avraham Ihoda'
    """
    authors = record['author'].split(' and ')
    authors = bc.getnames(authors)  # Correct "Name, Surname"-format
    authors = [a.split(',')[0] for a in authors]
    authors = list(map(_norm_author, authors))
    authors.sort()
    return ' '.join(authors)
 def test_getnames(self):
     names = ['Foo Bar',
              'Foo B. Bar',
              'F. B. Bar',
              'F.B. Bar',
              'F. Bar',
              'Jean de Savigny',
              'Jean la Tour',
              'Jean le Tour',
              'Mike ben Akar',
              #'Jean de la Tour',
              #'Johannes Diderik van der Waals',
              ]
     result = getnames(names)
     expected = ['Bar, Foo',
                 'Bar, Foo B.',
                 'Bar, F. B.',
                 'Bar, F. B.',
                 'Bar, F.',
                 'de Savigny, Jean',
                 'la Tour, Jean',
                 'le Tour, Jean',
                 'ben Akar, Mike',
                 #'de la Tour, Jean',
                 #'van der Waals, Johannes Diderik',
                 ]
     self.assertEqual(result, expected)
Exemple #3
0
 def test_getnames(self):
     names = [
         'Foo Bar',
         'Foo B. Bar',
         'F. B. Bar',
         'F.B. Bar',
         'F. Bar',
         'Jean de Savigny',
         'Jean la Tour',
         'Jean le Tour',
         'Mike ben Akar',
         #'Jean de la Tour',
         #'Johannes Diderik van der Waals',
     ]
     result = getnames(names)
     expected = [
         'Bar, Foo',
         'Bar, Foo B.',
         'Bar, F. B.',
         'Bar, F. B.',
         'Bar, F.',
         'de Savigny, Jean',
         'la Tour, Jean',
         'le Tour, Jean',
         'ben Akar, Mike',
         #'de la Tour, Jean',
         #'van der Waals, Johannes Diderik',
     ]
     self.assertEqual(result, expected)
Exemple #4
0
    def customizations(record):
        for n in ["author", "editor"]:
            if n in record:
                a = [i for i in record[n].replace("\n", " ").split(", ")]
                b = [i.split(" and ") for i in a]
                c = [item for sublist in b for item in sublist]
                d = [i.strip() for i in c]
                record[n] = getnames(d)

        return record
Exemple #5
0
    def customizations(record):
        for n in ['author', 'editor']:
            if n in record:
                a = [i for i in record[n].replace('\n', ' ').split(', ')]
                b = [i.split(" and ") for i in a]
                c = [item for sublist in b for item in sublist]
                d = [i.strip() for i in c]
                record[n] = getnames(d)

        return record
    def get_names(self):
        """
        Get a list of names from the reference.

        .. code-block:: python

            >>> with open("bib/1968_chow.bib", "r") as fh:
            ...    _citation = bibtexparser.load(fh)
            >>> db = Reference(_citation)
            >>> print(db.get_names())
            ['Friedman, Nir', 'Geiger, Dan', 'Goldszmidt, Moises']
        """
        return getnames([
            i.strip()
            for i in self.db["author"].replace("\n", " ").split(" and ")
        ])
Exemple #7
0
def editor(record):
    """
    Split editor field into a list of "Name, Surname".

    :param record: the record.
    :type record: dict
    """
    if "editor" in record:
        if record["editor"]:
            record["editor"] = customization.getnames([
                i.strip()
                for i in record["editor"].replace("\n", " ").split(" and ")
            ])
        else:
            del record["editor"]
    return record
 def btex_custom(self, record):
     r = convert_to_unicode(record)
     if "pages" in record:  # fix -- -> –
         if "-" in record["pages"]:
             p = [i.strip().strip('-') for i in record["pages"].split("-")]
             record["pages"] = p[0] + u'–' + p[-1]
     authors = r.get('author')
     if not authors:
         authors = r.get('editor', 'Anon.')
     _authors = getnames(authors.split(" and "))
     _and_surnames = self.and_authors(
         [s.split(",")[0].strip() for s in _authors])
     r['author'] = self.and_authors(_authors)
     r['surnames'] = _and_surnames
     r['author_year'] = _and_surnames + u" " + r['year']
     r['unique_suffix'] = self.unique_suffix(r['author_year'])
     r['author_year'] += r['unique_suffix']
     r['title'] = r['title']  # .replace("{", "").replace("}","")
     return r
Exemple #9
0
 def btex_custom(self, record):
     r = convert_to_unicode(record)
     if "pages" in record:  # fix -- -> –
         if "-" in record["pages"]:
             p = [i.strip().strip('-') for i in record["pages"].split("-")]
             record["pages"] = p[0] + u'–' + p[-1]
     authors = r.get('author')
     if not authors:
         authors = r.get('editor', 'Anon.')
     _authors = getnames(authors.split(" and "))
     _and_surnames = self.and_authors(
         [s.split(",")[0].strip() for s in _authors])
     r['author'] = self.and_authors(_authors)
     r['surnames'] = _and_surnames
     r['author_year'] = _and_surnames + u" " + r.get('year', '')
     r['unique_suffix'] = self.unique_suffix(r['author_year'])
     r['author_year'] += r['unique_suffix']
     r['title'] = r['title']  # .replace("{", "").replace("}","")
     return r
def custom(record):
    try:
        record = c.convert_to_unicode(record)
    except TypeError as e:
        logging.warning("Unicode Error on: {}".format(record['ID']))
        record['error'] = 'unicode'

    try:
        #add md5 of associated files
        files = [add_slash_if_necessary(y) for x in record['file'].split(';') for y in x.split(':') if bool(y.strip()) and y.strip().lower() != 'pdf']
        file_set = set(files)
        if not 'hashes' in record:
            hashes = [file_to_hash(x) for x in file_set]
            record['hashes'] = ";".join(hashes)
            #regularize format of files list
            record['file'] = ";".join(file_set)
    except Exception as e:
        logging.warning("File Error: {} : {}".format(record['ID'], e.args[0]))
        record['error'] = 'file'

    #todo: if file is not in the library common prefix, move it there
    #look for year, then first surname, then copy in, making dir if necessary
    if file_set:
        for x in file_set:
            try:
                current_path = realpath(x)
                common = commonpath([current_path, args.library])
                if common != args.library:
                    logging.info("Found file outside library: {}".format(current_path))
                    logging.info("Common: {}".format(common))
                    #get the author and year
                    year = record['year']
                    authors = c.getnames([i.strip() for i in record["author"].replace('\n', ' ').split(" and ")])
                    authors_split = [c.splitname(a) for a in authors]
                    author_surnames = [a['last'][0] for a in authors_split]
                    new_path = join(args.library, year, ", ".join(author_surnames))
                    logging.info("New Path: {}".format(new_path))
                    #create directory if necessary
                    #copy file
                    full_new_path = join(new_path, split(current_path)[1])
                    logging.info("Copying file")
                    logging.info("From: {}".format(current_path))
                    logging.info("To: {}".format(full_new_path))
                    response = input("Enter to confirm: ")
                    if response == "":
                        logging.info("Proceeding")
                        if not exists(new_path):
                            mkdir(new_path)
                        if exists(full_new_path):
                            raise Exception("File already exists")
                        copyfile(x, full_new_path)
                        file_set.remove(x)
                        file_set.add(full_new_path)
                        record['file'] = ";".join(file_set)
            except Exception as e:
                logging.info("Issue copying file for: {}".format(x))
                logging.info(e)
                record['error'] = 'file_copy'


    #regularize keywords
    try:
        keywords = set()
        if 'tags' not in record:
            if 'keywords' in record:
                keywords.update([x.strip() for x in record['keywords'].split(',')])
                del record['keywords']
            if 'mendeley-tags' in record:
                keywords.update([x.strip() for x in record['mendeley-tags'].split(',')])
                del record['mendeley-tags']

            record['tags'] = ",".join(keywords)
    except Error as e:
        logging.warning("Tag Error: {}".format(record['ID']))
        record['error'] = 'tag'

    # record = c.type(record)
    # record = c.author(record)
    # record = c.editor(record)
    # record = c.journal(record)
    # record = c.keyword(record)
    # record = c.link(record)
    # record = c.doi(record)
    # record['p_authors'] = []
    # if 'author' in record:
    #     record['p_authors'] = [c.splitname(x, False) for x in record['author']]
    return record
 def test_getnames_braces(self):
     names = ['A. {Delgado de Molina}', 'M. Vign{\\\'e}']
     result = getnames(names)
     expected = ['Delgado de Molina, A.', 'Vigné, M.']
     self.assertEqual(result, expected)
Exemple #12
0
 def test_getnames_braces(self):
     names = ['A. {Delgado de Molina}', 'M. Vign{\\\'e}']
     result = getnames(names)
     expected = ['Delgado de Molina, A.', 'Vigné, M.']
     self.assertEqual(result, expected)
 def test_getnames_add_double_dot(self):
     names = ['FG Bar', 'CQ Lux']
     result = getnames(names)
     expected = [['F. G.', 'Bar'], ['C. Q.', 'Lux']]
     self.assertEqual(result, expected)
 def test_getnames_add_single_dot(self):
     names = ['F Bar', 'C Lux']
     result = getnames(names)
     expected = [['F.', 'Bar'], ['C.', 'Lux']]
     self.assertEqual(result, expected)