Exemple #1
0
    def __init__(self, string="", first="", middle="", prelast="", last="", lineage=""):
        """
        :param string: The full name string.
            It will be parsed and split into separate first, last, middle,
            pre-last and lineage name parst.

            Supported name formats are:

            - von Last, First
            - von Last, Jr, First
            - First von Last

            (see BibTeX manual for explanation)

        """

        self.first_names = []
        self.middle_names = []
        self.prelast_names = []
        self.last_names = []
        self.lineage_names = []

        string = string.strip()
        if string:
            self._parse_string(string)
        self.first_names.extend(split_tex_string(first))
        self.middle_names.extend(split_tex_string(middle))
        self.prelast_names.extend(split_tex_string(prelast))
        self.last_names.extend(split_tex_string(last))
        self.lineage_names.extend(split_tex_string(lineage))
    def __init__(self, string="", first="", middle="", prelast="", last="", lineage=""):
        """
        :param string: The full name string.
            It will be parsed and split into separate first, last, middle,
            pre-last and lineage name parst.

            Supported name formats are:

            - von Last, First
            - von Last, Jr, First
            - First von Last

            (see BibTeX manual for explanation)

        """

        self.first_names = []
        self.middle_names = []
        self.prelast_names = []
        self.last_names = []
        self.lineage_names = []

        string = string.strip()
        if string:
            self._parse_string(string)
        self.first_names.extend(split_tex_string(first))
        self.middle_names.extend(split_tex_string(middle))
        self.prelast_names.extend(split_tex_string(prelast))
        self.last_names.extend(split_tex_string(last))
        self.lineage_names.extend(split_tex_string(lineage))
Exemple #3
0
 def __init__(self, string="", first="", middle="", prelast="", last="", lineage=""):
     self._first = []
     self._middle = []
     self._prelast = []
     self._last = []
     self._lineage = []
     string = string.strip()
     if string:
         self.parse_string(string)
     self._first.extend(split_tex_string(first))
     self._middle.extend(split_tex_string(middle))
     self._prelast.extend(split_tex_string(prelast))
     self._last.extend(split_tex_string(last))
     self._lineage.extend(split_tex_string(lineage))
Exemple #4
0
 def __init__(self, string="", first="", middle="", prelast="", last="", lineage=""):
     self._first = []
     self._middle = []
     self._prelast = []
     self._last = []
     self._lineage = []
     string = string.strip()
     if string:
         self.parse_string(string)
     self._first.extend(split_tex_string(first))
     self._middle.extend(split_tex_string(middle))
     self._prelast.extend(split_tex_string(prelast))
     self._last.extend(split_tex_string(last))
     self._lineage.extend(split_tex_string(lineage))
Exemple #5
0
    def parse_string(self, name):
        """Extract various parts of the name from a string.
        Supported formats are:
         - von Last, First
         - von Last, Jr, First
         - First von Last
        (see BibTeX manual for explanation)
        """
        def process_first_middle(parts):
            try:
                self._first.append(parts[0])
                self._middle.extend(parts[1:])
            except IndexError:
                pass

        def process_von_last(parts):
            von, last = rsplit_at(parts, lambda part: part.islower())
            if von and not last:
                last.append(von.pop())
            self._prelast.extend(von)
            self._last.extend(last)

        def find_pos(lst, pred):
            for i, item in enumerate(lst):
                if pred(item):
                    return i
            return i + 1

        def split_at(lst, pred):
            """Split the given list into two parts.

            The second part starts with the first item for which the given
            predicate is True.
            """
            pos = find_pos(lst, pred)
            return lst[:pos], lst[pos:]

        def rsplit_at(lst, pred):
            rpos = find_pos(reversed(lst), pred)
            pos = len(lst) - rpos
            return lst[:pos], lst[pos:]

        parts = split_tex_string(name, ',')
        if len(parts) == 3: # von Last, Jr, First
            process_von_last(split_tex_string(parts[0]))
            self._lineage.extend(split_tex_string(parts[1]))
            process_first_middle(split_tex_string(parts[2]))
        elif len(parts) == 2: # von Last, First
            process_von_last(split_tex_string(parts[0]))
            process_first_middle(split_tex_string(parts[1]))
        elif len(parts) == 1: # First von Last
            parts = split_tex_string(name)
            first_middle, von_last = split_at(parts, lambda part: part.islower())
            if not von_last and first_middle:
                last = first_middle.pop()
                von_last.append(last)
            process_first_middle(first_middle)
            process_von_last(von_last)
        else:
            raise PybtexError('Invalid name format: %s' % name)
Exemple #6
0
    def parse_string(self, name):
        """Extract various parts of the name from a string.
        Supported formats are:
         - von Last, First
         - von Last, Jr, First
         - First von Last
        (see BibTeX manual for explanation)
        """
        def process_first_middle(parts):
            try:
                self._first.append(parts[0])
                self._middle.extend(parts[1:])
            except IndexError:
                pass

        def process_von_last(parts):
            von, last = rsplit_at(parts, lambda part: part.islower())
            if von and not last:
                last.append(von.pop())
            self._prelast.extend(von)
            self._last.extend(last)

        def find_pos(lst, pred):
            for i, item in enumerate(lst):
                if pred(item):
                    return i
            return i + 1

        def split_at(lst, pred):
            """Split the given list into two parts.

            The second part starts with the first item for which the given
            predicate is True.
            """
            pos = find_pos(lst, pred)
            return lst[:pos], lst[pos:]

        def rsplit_at(lst, pred):
            rpos = find_pos(reversed(lst), pred)
            pos = len(lst) - rpos
            return lst[:pos], lst[pos:]

        parts = split_tex_string(name, ',')
        if len(parts) == 3: # von Last, Jr, First
            process_von_last(split_tex_string(parts[0]))
            self._lineage.extend(split_tex_string(parts[1]))
            process_first_middle(split_tex_string(parts[2]))
        elif len(parts) == 2: # von Last, First
            process_von_last(split_tex_string(parts[0]))
            process_first_middle(split_tex_string(parts[1]))
        elif len(parts) == 1: # First von Last
            parts = split_tex_string(name)
            first_middle, von_last = split_at(parts, lambda part: part.islower())
            if not von_last and first_middle:
                last = first_middle.pop()
                von_last.append(last)
            process_first_middle(first_middle)
            process_von_last(von_last)
        else:
            raise PybtexError('Invalid name format: %s' % name)
Exemple #7
0
    def parse_string(self, name):
        """Extract various parts of the name from a string.
        Supported formats are:
         - von Last, First
         - von Last, Jr, First
         - First von Last
        (see BibTeX manual for explanation)
        """

        def process_first_middle(parts):
            try:
                self._first.append(parts[0])
                self._middle.extend(parts[1:])
            except IndexError:
                pass

        def process_von_last(parts):
            i = 0
            for i, part in enumerate(reversed(parts[:-1])):
                if part.islower():
                    break
            pos = len(parts) - i - 1
            von = parts[:pos]
            last = parts[pos:]
            self._prelast.extend(von)
            self._last.extend(last)

        def split_at(lst, pred):
            """Split the given list into two parts.

            The second part starts with the first item for which the given
            predicate is True. If the predicate is False for all items, the
            last element still comes to the last part. This is how BibTeX
            parses names.

            """
            for i, item in enumerate(lst):
                if pred(item):
                    break
            return lst[:i], lst[i:]

        parts = split_tex_string(name, ",")
        if len(parts) == 3:  # von Last, Jr, First
            process_von_last(split_tex_string(parts[0]))
            self._lineage.extend(split_tex_string(parts[1]))
            process_first_middle(split_tex_string(parts[2]))
        elif len(parts) == 2:  # von Last, First
            process_von_last(split_tex_string(parts[0]))
            process_first_middle(split_tex_string(parts[1]))
        elif len(parts) == 1:  # First von Last
            parts = split_tex_string(name)
            first_middle, von_last = split_at(parts, lambda part: part.islower())
            process_first_middle(first_middle)
            process_von_last(von_last)
        else:
            raise PybtexError("Invalid name format: %s" % name)
Exemple #8
0
    def _parse_string(self, name):
        """Extract various parts of the name from a string.

        >>> p = Person('Avinash K. Dixit')
        >>> print(p.first_names)
        [u'Avinash']
        >>> print(p.middle_names)
        [u'K.']
        >>> print(p.prelast_names)
        []
        >>> print(p.last_names)
        [u'Dixit']
        >>> print(p.lineage_names)
        []
        >>> print(six.text_type(p))
        Dixit, Avinash K.
        >>> p == Person(six.text_type(p))
        True
        >>> p = Person('Dixit, Jr, Avinash K. ')
        >>> print(p.first_names)
        [u'Avinash']
        >>> print(p.middle_names)
        [u'K.']
        >>> print(p.prelast_names)
        []
        >>> print(p.last_names)
        [u'Dixit']
        >>> print(p.lineage_names)
        [u'Jr']
        >>> print(six.text_type(p))
        Dixit, Jr, Avinash K.
        >>> p == Person(six.text_type(p))
        True

        >>> p = Person('abc')
        >>> print(p.first_names, p.middle_names, p.prelast_names, p.last_names, p.lineage_names)
        [] [] [] [u'abc'] []
        >>> p = Person('Viktorov, Michail~Markovitch')
        >>> print(p.first_names, p.middle_names, p.prelast_names, p.last_names, p.lineage_names)
        [u'Michail'] [u'Markovitch'] [] [u'Viktorov'] []
        """
        def process_first_middle(parts):
            try:
                self.first_names.append(parts[0])
                self.middle_names.extend(parts[1:])
            except IndexError:
                pass

        def process_von_last(parts):
            # von cannot be the last name in the list
            von_last = parts[:-1]
            definitely_not_von = parts[-1:]

            if von_last:
                von, last = rsplit_at(von_last, is_von_name)
                self.prelast_names.extend(von)
                self.last_names.extend(last)
            self.last_names.extend(definitely_not_von)

        def find_pos(lst, pred):
            for i, item in enumerate(lst):
                if pred(item):
                    return i
            return i + 1

        def split_at(lst, pred):
            """Split the given list into two parts.

            The second part starts with the first item for which the given
            predicate is True.
            """
            pos = find_pos(lst, pred)
            return lst[:pos], lst[pos:]

        def rsplit_at(lst, pred):
            rpos = find_pos(reversed(lst), pred)
            pos = len(lst) - rpos
            return lst[:pos], lst[pos:]

        def is_von_name(string):
            if string[0].isupper():
                return False
            if string[0].islower():
                return True
            else:
                for char, brace_level in scan_bibtex_string(string):
                    if brace_level == 0 and char.isalpha():
                        return char.islower()
                    elif brace_level == 1 and char.startswith('\\'):
                        return special_char_islower(char)
            return False

        def special_char_islower(special_char):
            control_sequence = True
            for char in special_char[1:]:  # skip the backslash
                if control_sequence:
                    if not char.isalpha():
                        control_sequence = False
                else:
                    if char.isalpha():
                        return char.islower()
            return False

        parts = split_tex_string(name, ',')
        if len(parts) > 3:
            report_error(InvalidNameString(name))
            last_parts = parts[2:]
            parts = parts[:2] + [' '.join(last_parts)]

        if len(parts) == 3:  # von Last, Jr, First
            process_von_last(split_tex_string(parts[0]))
            self.lineage_names.extend(split_tex_string(parts[1]))
            process_first_middle(split_tex_string(parts[2]))
        elif len(parts) == 2:  # von Last, First
            process_von_last(split_tex_string(parts[0]))
            process_first_middle(split_tex_string(parts[1]))
        elif len(parts) == 1:  # First von Last
            parts = split_tex_string(name)
            first_middle, von_last = split_at(parts, is_von_name)
            if not von_last and first_middle:
                last = first_middle.pop()
                von_last.append(last)
            process_first_middle(first_middle)
            process_von_last(von_last)
        else:
            # should hot really happen
            raise ValueError(name)
    def _parse_string(self, name):
        """Extract various parts of the name from a string.

        >>> p = Person('Avinash K. Dixit')
        >>> print p.first_names
        ['Avinash']
        >>> print p.middle_names
        ['K.']
        >>> print p.prelast_names
        []
        >>> print p.last_names
        ['Dixit']
        >>> print p.lineage_names
        []
        >>> print unicode(p)
        Dixit, Avinash K.
        >>> p == Person(unicode(p))
        True
        >>> p = Person('Dixit, Jr, Avinash K. ')
        >>> print p.first_names
        ['Avinash']
        >>> print p.middle_names
        ['K.']
        >>> print p.prelast_names
        []
        >>> print p.last_names
        ['Dixit']
        >>> print p.lineage_names
        ['Jr']
        >>> print unicode(p)
        Dixit, Jr, Avinash K.
        >>> p == Person(unicode(p))
        True

        >>> p = Person('abc')
        >>> print p.first_names, p.middle_names, p.prelast_names, p.last_names, p.lineage_names
        [] [] [] ['abc'] []
        >>> p = Person('Viktorov, Michail~Markovitch')
        >>> print p.first_names, p.middle_names, p.prelast_names, p.last_names, p.lineage_names
        ['Michail'] ['Markovitch'] [] ['Viktorov'] []
        """
        def process_first_middle(parts):
            try:
                self.first_names.append(parts[0])
                self.middle_names.extend(parts[1:])
            except IndexError:
                pass

        def process_von_last(parts):
            # von cannot be the last name in the list
            von_last = parts[:-1]
            definitely_not_von = parts[-1:]

            if von_last:
                von, last = rsplit_at(von_last, is_von_name)
                self.prelast_names.extend(von)
                self.last_names.extend(last)
            self.last_names.extend(definitely_not_von)

        def find_pos(lst, pred):
            for i, item in enumerate(lst):
                if pred(item):
                    return i
            return i + 1

        def split_at(lst, pred):
            """Split the given list into two parts.

            The second part starts with the first item for which the given
            predicate is True.
            """
            pos = find_pos(lst, pred)
            return lst[:pos], lst[pos:]

        def rsplit_at(lst, pred):
            rpos = find_pos(reversed(lst), pred)
            pos = len(lst) - rpos
            return lst[:pos], lst[pos:]

        def is_von_name(string):
            if string[0].isupper():
                return False
            if string[0].islower():
                return True
            else:
                for char, brace_level in scan_bibtex_string(string):
                    if brace_level == 0 and char.isalpha():
                        return char.islower()
                    elif brace_level == 1 and char.startswith('\\'):
                        return special_char_islower(char)
            return False

        def special_char_islower(special_char):
            control_sequence = True
            for char in special_char[1:]:  # skip the backslash
                if control_sequence:
                    if not char.isalpha():
                        control_sequence = False
                else:
                    if char.isalpha():
                        return char.islower()
            return False

        parts = split_tex_string(name, ',')
        if len(parts) > 3:
            report_error(InvalidNameString(name))
            last_parts = parts[2:]
            parts = parts[:2] + [' '.join(last_parts)]

        if len(parts) == 3:  # von Last, Jr, First
            process_von_last(split_tex_string(parts[0]))
            self.lineage_names.extend(split_tex_string(parts[1]))
            process_first_middle(split_tex_string(parts[2]))
        elif len(parts) == 2:  # von Last, First
            process_von_last(split_tex_string(parts[0]))
            process_first_middle(split_tex_string(parts[1]))
        elif len(parts) == 1:  # First von Last
            parts = split_tex_string(name)
            first_middle, von_last = split_at(parts, is_von_name)
            if not von_last and first_middle:
                last = first_middle.pop()
                von_last.append(last)
            process_first_middle(first_middle)
            process_von_last(von_last)
        else:
            # should hot really happen
            raise ValueError(name)
Exemple #10
0
    def __init__(self,
                 string="",
                 first="",
                 middle="",
                 prelast="",
                 last="",
                 lineage=""):
        """
        :param string: The full name string.
            It will be parsed and split into separate first, last, middle,
            pre-last and lineage name parst.

            Supported name formats are:

            - von Last, First
            - von Last, Jr, First
            - First von Last

            (see BibTeX manual for explanation)

        """

        self.first_names = []
        """
        A list of first names.

        .. versionadded:: 0.19
            Earlier versions used :py:meth:`.first`, which is now deprecated.
        """

        self.middle_names = []
        """
        A list of middle names.

        .. versionadded:: 0.19
            Earlier versions used :py:meth:`.middle`, which is now deprecated.
        """

        self.prelast_names = []
        """
        A list of pre-last (aka von) name parts.

        .. versionadded:: 0.19
            Earlier versions used :py:meth:`.middle`, which is now deprecated.
        """

        self.last_names = []
        """
        A list of last names.

        .. versionadded:: 0.19
            Earlier versions used :py:meth:`.last`, which is now deprecated.
        """

        self.lineage_names = []
        """
        A list of linage (aka Jr) name parts.

        .. versionadded:: 0.19
            Earlier versions used :py:meth:`.lineage`, which is now deprecated.
        """

        string = string.strip()
        if string:
            self._parse_string(string)
        self.first_names.extend(split_tex_string(first))
        self.middle_names.extend(split_tex_string(middle))
        self.prelast_names.extend(split_tex_string(prelast))
        self.last_names.extend(split_tex_string(last))
        self.lineage_names.extend(split_tex_string(lineage))
Exemple #11
0
    def filter_bibentry(self, entry):
        #
        # entry is a pybtex.database.Entry object
        #

        # first apply filters that are applied to all fields of the entry

        def thefilter(x):
            if self.fix_space_after_escape:
                x = do_fix_space_after_escape(x)
            if self.fix_swedish_a:
                # OBSOLETE, but still accepted for backwards compatibility
                x = re.sub(r'\\AA\s+', r'\\AA{}', x)
                x = re.sub(r'\\o\s+', r'\\o{}', x)
            if self.encode_utf8_to_latex:
                # use custom encoder
                x = custom_utf8tolatex(x)
            if self.encode_latex_to_utf8:
                x = butils.latex_to_text(x)
            return x

        def filter_person(p):
            oldpstr = unicodestr(p)
            #print(oldpstr)
            newpstr = thefilter(oldpstr)
            #print(newpstr)
            return Person(string=newpstr)
            # does not work this way because of the way Person() splits at spaces:
            #parts = {}
            #for typ in ['first', 'middle', 'prelast', 'last', 'lineage']:
            #    parts[typ] = thefilter(u" ".join(p.get_part(typ)))
            #return Person(**parts)


        for (role,perslist) in iteritems(entry.persons):
            for k in range(len(perslist)):
                entry.persons[role][k] = filter_person(perslist[k])
        
        for (k,v) in iteritems(entry.fields):
            entry.fields[k] = thefilter(v)

        logger.longdebug("entry %s passed basic filter: %r", entry.key, entry)

        # additionally:

        if self.unprotect_full_last_names:
            for (role,perslist) in iteritems(entry.persons):
                for p in perslist:
                    if len(p.last_names) == 1:
                        lname = remove_full_braces(p.last_names[0])
                        p.last_names = split_tex_string(lname)

        def filter_entry_remove_type_from_phd(entry):
            if (entry.type != 'phdthesis' or 'type' not in entry.fields):
                return
            if ('phd' in re.sub(r'[^a-z]', '', entry.fields['type'].lower())):
                # entry is phd type, so remove explicit type={}
                del entry.fields['type']
            
        if (self.remove_type_from_phd):
            filter_entry_remove_type_from_phd(entry)

        if (self.remove_pages_from_book):
            if (entry.type == 'book' and 'pages' in entry.fields):
                del entry.fields['pages']


        #
        # do this before 'self.remove_full_braces', because the latter depends on language
        #
        if (self.rename_language):
            if 'language' in entry.fields:
                logger.longdebug('Maybe fixing language in entry %s: lang=%r',
                                 entry.key, entry.fields['language'])
                entry.fields['language'] = self.rename_language_rx.sub(
                    lambda m: self.rename_language.get(m.group('lang').lower(), m.group('lang')),
                    entry.fields['language']
                )
                logger.longdebug('  --> language is now = %r', entry.fields['language'])



        def filter_entry_remove_full_braces(entry, fieldlist):
            for k,v in iteritems(entry.fields):
                if fieldlist is None or k in fieldlist:
                    entry.fields[k] = remove_full_braces(v)

        if self.remove_full_braces:
            if entry.fields.get('language','').lower() not in self.remove_full_braces_not_lang:
                filter_entry_remove_full_braces(entry, self.remove_full_braces_fieldlist)


        if (self.map_annote_to_note):
            if 'annote' in entry.fields:
                thenote = ''
                if len(entry.fields.get('note', '')):
                    thenote = entry.fields['note'] + '; '
                entry.fields['note'] = thenote + entry.fields['annote']
                del entry.fields['annote']
                
        if (self.auto_urlify):
            for fld in self.auto_urlify:
                if fld in entry.fields:
                    entry.fields[fld] = do_auto_urlify(entry.fields[fld])

        def filter_protect_names(entry):
            def repl_ltx_str(n, r, x):
                # scan string until next '{', read latex expression and skip it, etc.
                lw = latexwalker.LatexWalker(x, tolerant_parsing=True)
                pos = 0
                newx = u''
                therx = re.compile(r'((?P<openbrace>\{)|'+r.pattern+r')', re.IGNORECASE)
                while True:
                    m = therx.search(x, pos)
                    if m is None:
                        newx += x[pos:]
                        break
                    newpos = m.start()
                    newx += x[pos:newpos]
                    if m.group('openbrace'):
                        # we encountered an opening brace, so we need to copy in everything verbatim
                        (junknode, np, nl) = lw.get_latex_expression(newpos)
                        # just copy the contents as is and move on
                        newx += x[newpos:np+nl]
                        newpos = np + nl
                    else:
                        # we found an instance of the string we wanted to protect, so protect it:
                        newx += '{' + n + '}'
                        newpos = m.end()

                    # continue from our last position
                    pos = newpos
                    
                return newx

            for key, val in iteritems(entry.fields):
                if key in ('doi', 'url', 'file'):
                    continue
                newval = val
                for n,r in self.protect_names:
                    newval = repl_ltx_str(n, r, newval)
                if (newval != val):
                    entry.fields[key] = newval

        if (self.protect_names):
            filter_protect_names(entry)

        # include stuff like:
        #
        # title = "{\textquotedblleft}Relative State{\textquotedblright} Formulation of Quantum Mechanics"
        #
        _rx_prcap_lead = r'([^\w\{]|\\[A-Za-z]+|\{\\[A-Za-z]+\})*'
        if (self.protect_capital_letter_after_dot):
            for fld in self.protect_capital_letter_after_dot:
                if fld in entry.fields:
                    entry.fields[fld] = re.sub(r'(?P<dotlead>[.:]'+_rx_prcap_lead+r')(?P<ucletter>[A-Z])',
                                               lambda m: m.group('dotlead')+u'{'+m.group('ucletter')+u'}',
                                               entry.fields[fld])
        if (self.protect_capital_letter_at_begin):
            for fld in self.protect_capital_letter_at_begin:
                if fld in entry.fields:
                    entry.fields[fld] = re.sub(r'^(?P<lead>'+_rx_prcap_lead+r')(?P<ucletter>[A-Z])',
                                               lambda m: m.group('lead')+u'{'+m.group('ucletter')+u'}',
                                               entry.fields[fld])

        if (self.fix_mendeley_bug_urls):
            for fld in self.fix_mendeley_bug_urls:
                if fld in entry.fields:
                    entry.fields[fld] = do_fix_mendeley_bug_urls(entry.fields[fld])

        _rx_dbl_quotes = [
            re.compile(r"``(?P<contents>.*?)''"),
            # this pattern must be tested first, because otherwise we leave stray braces
            re.compile(r'\{\\textquotedblleft\}(?P<contents>.*?)\{\\textquotedblright\}'),
            re.compile(r'\\textquotedblleft(?P<contents>.*?)\\textquotedblright'),
        ]
        _rx_sgl_quotes = [
            # try to match correct quote in " `My dad's dog' is a nice book ".
            re.compile(r"`(?P<contents>.*?)'(?=\W|$)"),
            # this pattern must be tested first, because otherwise we leave stray braces
            re.compile(r'\{\\textquoteleft\}(?P<contents>.*?)\{\\textquoteright\}'),
            re.compile(r'\\textquoteleft(?P<contents>.*?)\\textquoteright'),
        ]
        if (self.convert_dbl_quotes):
            for fld in self.convert_dbl_quotes:
                if fld in entry.fields:
                    for rx in _rx_dbl_quotes:
                        entry.fields[fld] = re.sub(rx,
                                                   lambda m: self.dbl_quote_macro+u"{"+m.group('contents')+u"}",
                                                   entry.fields[fld])
        if (self.convert_sgl_quotes):
            for fld in self.convert_sgl_quotes:
                if fld in entry.fields:
                    for rx in _rx_sgl_quotes:
                        entry.fields[fld] = re.sub(rx,
                                                   lambda m: self.sgl_quote_macro+u"{"+m.group('contents')+u"}",
                                                   entry.fields[fld])
                    
        if (self.remove_file_field):
            if ('file' in entry.fields):
                del entry.fields['file']

        if (self.remove_fields):
            for fld in self.remove_fields:
                entry.fields.pop(fld,None)

        if (self.remove_doi_prefix):
            if 'doi' in entry.fields:
                entry.fields['doi'] = re.sub(r'^\s*doi[ :]\s*', '', entry.fields['doi'], flags=re.IGNORECASE)

        logger.longdebug("fixes filter, result: %s -> Authors=%r, fields=%r",
                         entry.key, entry.persons.get('author', None),
                         entry.fields)

        return