Example #1
0
    def __init__(self, only_single_letter_firsts=False, names_to_utf8=True,
                 only_one_initial=False, strip_first_names=False, *roles):
        """
        Arguments:

          - only_single_letter_firsts(bool): Make proper initials (e.g. C. H. Bennett)
            only if the entry itself only has initials. This is useful if your entries
            don't contain the proper punctuation (e.g. C H Bennett). (default: False)

          - names_to_utf8(bool): Convert LaTeX escapes to UTF-8 characters in names in
            bib file. (default: True)

          - only_one_initial(bool): Keep only the first initial, removing any
            middle names.  For instance, "P. A. M. Dirac" ->
            "P. Dirac". (default: False)

          - strip_first_names(bool): Only keep last names and strip first/middle
            names entirely.

        """
        super(NameInitialsFilter, self).__init__()

        self.roles = roles
        if not self.roles:
            self.roles = ['author']

        self._names_to_utf8 = getbool(names_to_utf8)
        self._only_single_letter_firsts = getbool(only_single_letter_firsts)
        self._only_one_initial = getbool(only_one_initial)
        self._strip_first_names = getbool(strip_first_names)

        logger.debug('NameInitialsFilter constructor')
Example #2
0
    def __init__(self, dupfile=None, warn=False, custom_bibalias=False,
                 keep_only_used_in_jobname=None, jobname_search_dirs=None,
                 *args):
        r"""DuplicatesFilter constructor.

        *dupfile: the name of a file to write latex code for defining duplicates to. This file
                  will be overwritten!!
        *warn(bool): if this flag is set, dupfile is not mandatory, and a warning is issued
               for every duplicate entry found in the database.
        *custom_bibalias(bool): if set to TRUE, then no latex definitions will be generated
               in the file given in `dupfile', and will rely on a user-defined implementation
               of `\bibalias`.
        *keep_only_used_in_jobname: only keep entries which are referenced in LaTeX job Jobname.
               The corresponding AUX file is searched for and analyzed, see only_used filter.
               Note that this has no effect if the `dupfile' is not set.
        *jobname_search_dirs(CommaStrList): (use with keep_only_used_in_jobname) search for the
               AUX file in the given directories, as for the only_used filter.
        """

        BibFilter.__init__(self);

        self.dupfile = dupfile
        self.warn = butils.getbool(warn)
        self.custom_bibalias = butils.getbool(custom_bibalias)

        if len(args) == 1:
            if self.dupfile is None:
                self.dupfile = args[0]
            else:
                raise BibFilterError("duplicates", "Repeated values given for dupfile: one as an option (`%s'), "
                                     "the other as a positional argument (`%s')"%(self.dupfile, args[0]))
        elif len(args) != 0:
            raise BibFilterError("duplicates", "Received unexpected positional arguments (at most one expected, "
                                 "the dupfile name): [%s]"%(",".join(["%s"%(x) for x in args])))

        if not keep_only_used_in_jobname:
            keep_only_used_in_jobname = None
        self.keep_only_used_in_jobname = keep_only_used_in_jobname

        if jobname_search_dirs is not None:
            jobname_search_dirs = CommaStrList(jobname_search_dirs)
        self.jobname_search_dirs = jobname_search_dirs

        self.cache_entries_validator = None

        if (not self.dupfile and not self.warn):
            logger.warning("bibolamazi duplicates filter: no action will be taken as neither -sDupfile or"+
                           " -dWarn are given!")

        logger.debug('duplicates: dupfile=%r, warn=%r' % (dupfile, warn));
Example #3
0
    def __init__(self, Strip=False, StripAllIfDoiOrArxiv=False, StripDoiUrl=True, StripArxivUrl=True,
                 UrlFromDoi=False, UrlFromArxiv=False, KeepFirstUrlOnly=False, StripForTypes=None,
                 AddAsHowPublished=False, HowPublishedText='available at {urlstr}'):
        r"""
        UrlNormalizeFilter constructor.

        Arguments:

          - Strip(bool): Removes all URLs from the entry. Maybe add URLs according to the
                         other options. [default: False]

          - StripAllIfDoiOrArxiv(bool): Removes all URLs from the entry, but only if a DOI
                         identifier or an ArXiv ID is present. [default: False]

          - StripDoiUrl(bool): Remove any URL that is in fact a DOI lookup, e.g. of the form
                         `https://dx.doi.org/<DOI>`  [default: True]

          - StripArxivUrl(bool): Remove any URL that looks like an arxiv lookup, i.e. of the
                         form `http(s)://arxiv.org/abs/<ID>`  [default: True]

          - UrlFromDoi(bool): If the entry has a DOI identifier, then add an explicit URL
                         that is a DOI lookup, i.e. `https://dx.doi.org/<DOI>`  [default: False]

          - UrlFromArxiv(bool): If the entry has an ArXiv identifier, then add an explicit URL
                         that links to the arXiv page, i.e. `https://arxiv.org/abs/<ARXIV-ID>`
                         [default: False]

          - KeepFirstUrlOnly(bool): If the entry has several URLs, then after applying all
                         the other stripping rules, keep only the first remaining URL, if any.
                         [default: False]

          - StripForTypes: strip all URLs specified for entries among the given list of types.
                         Common types to strip would be e.g. 'book' or 'phdthesis'.

          - AddAsHowPublished(bool): Add a howpublished={available at \url{...}} entry to the bibtex.

          - HowPublishedText: replace the 'available at ' text for -dAddAsHowPublished.  Use Python
                         string formatting. Available keys are '{urlstr}' to insert
                         list of URLs concatenated with a comma, '{url}' to insert the
                         first url and the key 'urls' is passed the raw Python list as
                         argument.

        """
        super(UrlNormalizeFilter, self).__init__()

        self.strip = getbool(Strip)
        self.stripallifdoiorarxiv = getbool(StripAllIfDoiOrArxiv)
        self.stripdoiurl = getbool(StripDoiUrl)
        self.striparxivurl = getbool(StripArxivUrl)
        self.urlfromdoi = getbool(UrlFromDoi)
        self.urlfromarxiv = getbool(UrlFromArxiv)
        self.keepfirsturlonly = getbool(KeepFirstUrlOnly)
        self.stripfortypes = None
        if (StripForTypes is not None):
            self.stripfortypes = [ x.strip()  for x in StripForTypes.split(',') ]
        self.addashowpublished = getbool(AddAsHowPublished)
        self.howpublishedtext = HowPublishedText

        logger.debug('url filter constructor')
Example #4
0
    def __init__(self, jobname, search_dirs=[], prefix="", journal_ref_in_note=False):
        """CiteArxivFilter constructor.

        Arguments:
          - jobname: the base name of the latex file. Will search for jobname.aux and look
              for `\citation{..}' commands as they are generated by latex.
          - search_dirs(CommaStrList): the .aux file will be searched for in this list of
              directories; separate directories with commas e.g. 'path/to/dir1,path/to/dir2'
              (escape commas and backslashes with a backslash)
          - journal_ref_in_note(bool): keep the journal reference given by the arXiv in the
              note={} bibtex field. (default: No)
          - prefix: if set, citations should be in the the form `\cite{prefix:id}' (default:
              no prefix)
        """

        BibFilter.__init__(self);

        self.jobname = jobname
        self.search_dirs = CommaStrList(search_dirs)
        self.journal_ref_in_note = getbool(journal_ref_in_note)
        self.prefix = prefix

        if (not self.search_dirs):
            self.search_dirs = ['.', '_cleanlatexfiles'] # also for my cleanlatex utility :)

        logger.debug('citearxiv: jobname=%r' % (jobname,));
Example #5
0
    def __init__(self, format="%(author)s%(year)s%(journal_abb)s_%(title_word)s",
                 if_published=None, if_type=None):
        """
        CiteKeyFilter Constructor.

        Arguments:
         - format: Format of the citation key. Should contain placeholders %(author)s etc. 
             (See complete filter reference for details).
         - if_published(bool): If this option is specified, then only apply this filter to
             published or unpublished items, depending on the value given.
         - if_type(CommaStrList): You may specify a list of entry types to restrict the
             application of this filter to. By default, or if the list is empty, the filter
             applies to all entries.
        """

        super(CiteKeyFilter, self).__init__()

        self.fmt = format
        if if_published is None or if_published == '':
            self.if_published = None
        else:
            self.if_published = butils.getbool(if_published)

        self.if_type = None if (if_type is None or if_type ==  '') else [x.strip() for x in if_type]

        logger.debug('citekey: fmt=%r', self.fmt)
Example #6
0
    def __init__(self, mode="random", use_uppercase_text=False):
        r"""
        Constructor method for TestFilter.

        Note that this part of the constructor docstring itself isn't
        that useful, but the argument list below is parsed and used by
        the default automatic option parser for filter arguments. So
        document your arguments! If your filter accepts `**kwargs`,
        you may add more arguments below than you explicitly declare
        in your constructor prototype.

        If this function accepts `*args`, then additional positional
        arguments on the filter line will be passed to those
        args. (And not to the declared arguments.)

        Arguments:

          - mode(Mode): the operating mode to adopt

          - use_uppercase_text(bool): if set to True, then transform
            our added text to uppercase characters.
        """
        
        BibFilter.__init__(self)

        self.mode = Mode(mode)
        self.use_uppercase_text = getbool(use_uppercase_text)

        # debug log messages are seen by the user in verbose output mode
        logger.debug('my filter constructor: mode=%s, uppercase=%s',
                     self.mode, self.use_uppercase_text)
Example #7
0
    def __init__(self, message=None, *args, **kwargs):
        """Echo a custom message into the bibolamazi logger.

        Arguments:
          - message: the message to echo
          - level(LogLevel): the logger level required to display the message (one of 'LONGDEBUG',
            'DEBUG', 'WARNING', 'INFO', 'ERROR' or 'CRITICAL')
          - format(EchoFormat): how to display the message (one of 'default', 'simple' or 'warn')
          - warn(bool): short for '-sFormat=warn -sLevel=WARNING'
        """
        BibFilter.__init__(self)

        self.message = message
        if len(args) > 0:
            if self.message is None:
                self.message = " ".join(args)
            else:
                raise BibFilterError(
                    self.name(), "Got unexpected additional arguments: %s" % (", ".join(('"' + s + '"' for s in args)))
                )

        iswarn = kwargs.get("warn", None)
        if iswarn is not None and getbool(iswarn):
            if "level" not in kwargs:
                kwargs["level"] = "WARNING"
            if "format" not in kwargs:
                kwargs["format"] = "warn"

        self.loglevel = LogLevel(kwargs.get("level", logging.INFO))

        f = EchoFormat(kwargs.get("format", FMT_DEFAULT))
        self.fmt = msgformats[f.msgformat]
Example #8
0
    def __init__(self, only_single_letter_firsts=False, names_to_utf8=True, *roles, **kwargs):
        """
        Arguments:
          - only_single_letter_firsts(bool): Make proper initials (e.g. C. H. Bennett)
            only if the entry itself only has initials. This is useful if your entries
            don't contain the proper punctuation (e.g. C H Bennett). (default: False)
          - names_to_utf8(bool): Convert LaTeX escapes to UTF-8 characters in names in
            bib file. (default: True)
        """
        BibFilter.__init__(self);

        self.roles = roles;
        if not self.roles:
            self.roles = ['author'];

        self._names_to_utf8 = getbool(names_to_utf8)
        self._only_single_letter_firsts = getbool(only_single_letter_firsts)

        logger.debug('NameInitialsFilter constructor')
Example #9
0
    def update_allow_remote_filterpackages(self):
        settings = QSettings()
        settings.beginGroup('RemoteFilterPackages')
        # on Windows, Python 3.4 and Qt 5.7, this returns str... so ensure it is a bool
        allow_remote = butils.getbool(settings.value('AllowRemote', False))
        settings.endGroup()
        
        with BlockedSignals(self.ui.chkRemoteAllow):
            self.ui.chkRemoteAllow.setChecked(allow_remote)

        self.ui.grpGithubAuth.setEnabled(allow_remote)
Example #10
0
    def __init__(self, order=None, reverse=False):
        """
        Arguments:
          - order(OrderMode): The strategy according to which to order all the entries. Possible
                values: see below.
          - reverse(bool): Reverse the sorting order. Has no effect with 'raw' order mode.
        """
        super(OrderEntriesFilter, self).__init__()

        self.order = OrderMode(order)
        self.reverse = butils.getbool(reverse)

        logger.debug('orderentries: self.order=%r' % self.order)
Example #11
0
    def __init__(self, xmlfile="publications_%Y-%m-%dT%H-%M-%S.xml", export_annote=True,
                 no_arxiv_urls=False, fixes_for_ethz=False, print_diff_to_last=False):
        """
        Bib2EnXmlFilter constructor.

        Arguments:

         - xmlfile: The name of the XML file to output to. This string will be parsed with
           `strftime()`, see [https://docs.python.org/2/library/time.html#time.strftime].
           If the file exists, it will not be overwritten and an error will be reported.
           The default value is 'publications_%Y-%m-%dT%H-%M-%S.xml'.

         - export_annote(bool): If set to `False`, then annote={} fields in the bibtex
           will not be exported into <notes>, as when this is set to `True` (`True` is the
           default).

         - no_arxiv_urls(bool): If set to `True`, then arxiv URLs will automatically be
           added to the entry. Note that this is the only way to link to the online arXiv
           version, but you may disable this option if the URL is already present in the
           entry.

         - fixes_for_ethz(bool): If set to `True`, includes some fixes & changes to prepare
           for proper upload on ETHZ Silva's CMS publication database.

         - print_diff_to_last(bool): If `True`, then print out the difference between the
           new outputted XML file and the latest file generated with the same pattern.
        """

        BibFilter.__init__(self);

        self.xmlfilepattern = xmlfile
        self.xmlfile = datetime.now().strftime(xmlfile)
        self.export_annote = getbool(export_annote)
        self.no_arxiv_urls = getbool(no_arxiv_urls)
        self.fixes_for_ethz = getbool(fixes_for_ethz)
        self.print_diff_to_last = getbool(print_diff_to_last)

        logger.debug('bib2enxml: xmlfile=%r', self.xmlfile)
    def __init__(self,
                 scheme=JAbbrevModule('defaults'),
                 dot_at_abbrev=True,
                 dot_at_abbrev_cmd=r'\@'):
        r"""
        Arguments:

          * scheme(JAbbrevModule):   Use the given abbreviations scheme.

          * dot_at_abbrev(bool):   If true (the default), then abbreviations are written
                                   e.g. as "Phys.\@ Rev.\@ Lett.\@" which gets the spacing
                                   right in LaTeX (not end of sentence).  Set to false to
                                   keep the simple "Phys. Rev. Lett."

          * dot_at_abbrev_cmd:     The command to use after a dot when -dDotAtAbbrev is
                                   set.  You can set this, e.g., to "\@" or any other
                                   custom command.

        """

        self.scheme = scheme
        self.dot_at_abbrev = butils.getbool(dot_at_abbrev)
        self.dot_at_abbrev_cmd = dot_at_abbrev_cmd

        self.repl = []

        # for a in args:
        #     abbrev, name = a.split('=', 2)
        #     pat = re.sub(sep_pat, sep_pat, name) # "Phys.  Rev. Lett." -> "Phys(\.\s*|\s+)Rev(\.\s*|\s+)Lett"
        #     rx = re.compile(pat, flags=re.IGNORECASE)
        #     self.repl.append( (rx, abbrev) )

        # import the corresponding module
        strscheme = str(scheme)
        if '.' in strscheme:
            mod = importlib.import_module(strscheme)
        else:
            mod = importlib.import_module('bibolamazi_qi_filters.jab.' +
                                          strscheme)
        replacement_pairs = mod.__dict__['replacement_pairs']
        for k, v in replacement_pairs:
            # does nothing if k is already a re object:
            self.repl.append(
                (jab.mkrxs(k),
                 jab.mkvalrepl(v,
                               dot_at_abbrev=self.dot_at_abbrev,
                               dot_at_abbrev_cmd=self.dot_at_abbrev_cmd)))

        logger.debug("JNameAbbrevFilter: repl=%r", self.repl)
Example #13
0
 def make_filter_option(farg):
     fopt = farg.replace('_', '-');
     argdoc = argdocs.get(farg, _ArgDoc(farg,None,None))
     if argdoc.doc is not None:
         argdocdoc = argdoc.doc.replace('%', '%%')
         argdocdoc = textwrap.TextWrapper(width=80, replace_whitespace=True, drop_whitespace=True).fill(
             argdocdoc
             )
     else:
         argdocdoc = None
     optkwargs = {
         'action': 'store',
         'dest': farg,
         'help': argdocdoc,
         }
     if argdoc.argtypename == 'bool':
         # boolean switch
         optkwargs['metavar'] = '<BOOLEAN ARG>'
         if not fopt.startswith('no-'):
             optkwargs['help'] = '' # only provide help for second option
         group_filter.add_argument('--'+fopt, nargs='?', default=None, const=True,
                                   type=butils.getbool, **optkwargs)
         if not fopt.startswith('no-'):
             optkwargs['help'] = argdocdoc # only provide help for second option
             group_filter.add_argument('--no-'+fopt, nargs='?', default=None, const=False,
                                       type=lambda val: not butils.getbool(val), **optkwargs)
         # remember that we've seen a bool arg
         ns.has_a_boolean_arg = True
     else:
         if argdoc.argtypename:
             if (argdoc.argtypename not in ns.seen_types):
                 ns.seen_types.append(argdoc.argtypename)
             optkwargs['metavar'] = '<%s>'%(argdoc.argtypename)
         else:
             optkwargs['metavar'] = '<ARG>'
         group_filter.add_argument('--'+fopt, **optkwargs)
     return argdoc
Example #14
0
    def __init__(self, jobname=None, search_dirs=[], prefix="", journal_ref_in_note=False):
        r"""
        CiteArxivFilter constructor.

        Arguments:

          - jobname: the base name of the latex file whose citations we should
              analyze. Will search for jobname.aux and look for '\citation{..}'
              commands as they are generated by latex.  The corresponding AUX
              file is searched for and analyzed.  If -sJobname is not specified,
              then the LaTeX file name is guessed from the bibolamazi file name,
              as for the only_used filter and the duplicates filter.

          - search_dirs(CommaStrList): the .aux file will be searched for in
              this list of directories; separate directories with commas
              e.g. 'path/to/dir1,path/to/dir2'.  Paths are absolute or relative
              to bibolamazi file.

          - journal_ref_in_note(bool): keep the journal reference given by the
              arXiv in the note={} bibtex field. (default: No)

          - prefix: if set, citations should be in the the form
              '\cite{prefix:id}' (default: no prefix)
        """

        super(CiteArxivFilter, self).__init__()

        self.jobname = jobname
        self.search_dirs = CommaStrList(search_dirs)
        self.journal_ref_in_note = getbool(journal_ref_in_note)
        self.prefix = prefix

        if not self.search_dirs:
            self.search_dirs = ['.', '_cleanlatexfiles'] # also for my cleanlatex utility :)

        logger.debug('citearxiv: jobname=%r' % (self.jobname,))
Example #15
0
    def __init__(self, fix_swedish_a=False, encode_utf8_to_latex=False, encode_latex_to_utf8=False,
                 remove_type_from_phd=False, remove_full_braces=False, protect_names=None,
                 remove_file_field=False):
        """
        Constructor method for a useless filter.
        """
        
        BibFilter.__init__(self);

        self.fix_swedish_a = butils.getbool(fix_swedish_a);

        self.encode_utf8_to_latex = butils.getbool(encode_utf8_to_latex);
        self.encode_latex_to_utf8 = butils.getbool(encode_latex_to_utf8);

        if (self.encode_utf8_to_latex and self.encode_latex_to_utf8):
            raise FilterError("Conflicting options: `encode_utf8_to_latex' and `encode_latex_to_utf8'.");

        self.remove_type_from_phd = butils.getbool(remove_type_from_phd);

        try:
            self.remove_full_braces = butils.getbool(remove_full_braces);
            self.remove_full_braces_fieldlist = None; # all fields
        except ValueError:
            # not boolean, we have provided a field list.
            self.remove_full_braces = True;
            self.remove_full_braces_fieldlist = [ x.strip().lower() for x in remove_full_braces.split(',') ];

        if protect_names is not None:
            self.protect_names = dict([ (x.strip(), re.compile(r'\b'+x.strip()+r'\b', re.IGNORECASE))
                                        for x in protect_names.split(',') ]);
        else:
            self.protect_names = None;

        self.remove_file_field = butils.getbool(remove_file_field);


        logger.debug('useless test filter: fix_swedish_a=%r; encode_utf8_to_latex=%r; encode_latex_to_utf8=%r; '
                     'remove_type_from_phd=%r; '
                     'remove_full_braces=%r [fieldlist=%r], protect_names=%r, remove_file_field=%r'
                     % (self.fix_swedish_a, self.encode_utf8_to_latex, self.encode_latex_to_utf8,
                        self.remove_type_from_phd,
                        self.remove_full_braces, self.remove_full_braces_fieldlist, self.protect_names,
                        self.remove_file_field));
Example #16
0
    def __init__(self,
                 fix_space_after_escape=False,
                 encode_utf8_to_latex=False,
                 encode_latex_to_utf8=False,
                 remove_type_from_phd=False,
                 remove_pages_from_book=False,
                 remove_full_braces=False,
                 remove_full_braces_not_lang=[],
                 protect_names=None,
                 remove_file_field=False,
                 remove_fields=[],
                 remove_doi_prefix=False,
                 map_annote_to_note=False,
                 auto_urlify=False,
                 rename_language={},
                 fix_mendeley_bug_urls=False,
                 protect_capital_letter_after_dot=False,
                 protect_capital_letter_at_begin=False,
                 convert_dbl_quotes=False,
                 convert_sgl_quotes=False,
                 dbl_quote_macro=r'\qq',
                 sgl_quote_macro=r'\q',
                 unprotect_full_last_names=False,
                 # obsolete:
                 fix_swedish_a=False):
        """
        Constructor method for FixesFilter

        Arguments:
          - fix_space_after_escape(bool): transform `\\AA berg' and `M\\o ller' into `\\AA{}berg',
               `M\\o{}ller' to avoid bibtex styles from wrongfully splitting these words.
          - encode_utf8_to_latex(bool): encode known non-ascii characters into latex escape sequences.
          - encode_latex_to_utf8(bool): encode known latex escape sequences to unicode text (utf-8).
          - remove_type_from_phd(bool): Removes any `type=' field from @phdthesis{..} bibtex entries.
          - remove_pages_from_book(bool): Removes the `pages=' field from @book{..} bibtex entries.
          - remove_full_braces(BoolOrFieldList): removes overprotective global braces in field values.
          - remove_full_braces_not_lang(CommaStrList): (in conjunction with --remove-full-braces) removes the
            overprotective global braces only if the language of the entry (as per language={..} bibtex field)
            is not in the given list (case insensitive).
          - protect_names(CommaStrList): list of names to protect from bibtex style casing.
          - remove_file_field(bool): removes file={...} fields from all entries.
          - remove_fields(CommaStrList): removes given fields from all entries.
          - remove_doi_prefix(bool): removes `doi:' prefix from all DOIs, if present
          - map_annote_to_note(bool): maps `annote' bibtex field to a `note' field
          - auto_urlify: automatically wrap URLs into `\\url{}' commands. True/False, or a comma-separated
                list of fields to act on
          - rename_language(ColonCommaStrDict): replace e.g. `de' by `Deutsch'. Use
                format `alias1:language1,alias2:language2...'.
          - fix_mendeley_bug_urls(BoolOrFieldList): fix the `url' field for Mendeley's
                buggy output. Pass on a list of fields (comma-separated) to specify
                which fields to act on; by default if enabled only 'url'.
          - protect_capital_letter_after_dot(BoolOrFieldList): place first (capital) letter after a full
                stop or colon in protective braces (for the the given bibtex fields). Pass
                true or false here, or a list of fields on which to act (by default only 'title')
          - protect_capital_letter_at_begin(BoolOrFieldList): place first (capital) letter of a field in
                protective braces (for the the given bibtex fields). Pass
                true or false here, or a list of fields on which to act (by default only 'title')
          - convert_dbl_quotes(BoolOrFieldList): detect & convert double-quoted expressions to
                invoke a LaTeX macro. Pass
                true or false here, or a list of fields on which to act (by default 'title,abstract,booktitle,series')
          - dbl_quote_macro: the macro to use for double-quotes when convert_dbl_quotes is set
          - convert_sgl_quotes(BoolOrFieldList): detect & convert single-quoted expressions to
                invoke a LaTeX macro. Pass
                true or false here, or a list of fields on which to act (by default 'title,abstract,booktitle,series')
          - sgl_quote_macro: the macro to use for single-quotes when convert_sgl_quotes is set
          - unprotect_full_last_names(bool): remove curly braces around complete last names
          - fix_swedish_a(bool): (OBSOLETE, use -dFixSpaceAfterEscape instead.) 
                transform `\\AA berg' into `\\AA{}berg' for `\\AA' and `\\o' (this
                problem occurs in files generated e.g. by Mendeley); revtex tends to
                insert a blank after the `\\AA' or `\\o' otherwise.
        """
        
        super(FixesFilter, self).__init__()

        self.fix_space_after_escape = butils.getbool(fix_space_after_escape)
        self.fix_swedish_a = butils.getbool(fix_swedish_a); # OBSOLETE

        if (self.fix_swedish_a):
            logger.warning("Fixes Filter: option -dFixSwedishA is now obsolete, in favor of the more"
                           " general and better option -dFixSpaceAfterEscape. The old option will"
                           " still work for backwards compatibility, but please consider changing to"
                           " the new option.")

        self.encode_utf8_to_latex = butils.getbool(encode_utf8_to_latex)
        self.encode_latex_to_utf8 = butils.getbool(encode_latex_to_utf8)

        if (self.encode_utf8_to_latex and self.encode_latex_to_utf8):
            raise BibFilterError(self.name(),
                                 "Conflicting options: `encode_utf8_to_latex' and `encode_latex_to_utf8'.")

        self.remove_type_from_phd = butils.getbool(remove_type_from_phd)

        self.remove_pages_from_book = butils.getbool(remove_pages_from_book)

        remove_full_braces = BoolOrFieldList(remove_full_braces)
        if remove_full_braces.valuetype is bool:
            self.remove_full_braces = remove_full_braces.value
            self.remove_full_braces_fieldlist = None
        else:
            self.remove_full_braces = bool(len(remove_full_braces.value))
            self.remove_full_braces_fieldlist = [ x.strip().lower() for x in remove_full_braces.value ]

        if self.remove_full_braces:
            if not remove_full_braces_not_lang:
                self.remove_full_braces_not_lang = []
            else:
                self.remove_full_braces_not_lang = [
                    x.lower()
                    for x in CommaStrList(remove_full_braces_not_lang)
                    ]
        else:
            self.remove_full_braces_not_lang = None

        if protect_names is not None:
            def mkpatternrx(x):
                x = x.strip()
                if not x:
                    return tuple()
                # x may be a name, e.g. 'Bell', but it may also be a more complex string, e.g. 'i.i.d.'.
                #
                pattern = re.escape(x)

                # We need to make sure that a match doesn't begin or end in the
                # middle of a word. (e.g., "Bell" shouldn't match in "doorbell")
                if x[0].isalpha():
                    pattern = r'\b' + pattern
                if x[-1].isalpha():
                    pattern = pattern + r'\b'

                return (x, re.compile(pattern, re.IGNORECASE),)
                    
            self.protect_names = [ t for t in [ mkpatternrx(x) for x in protect_names ]
                                   if len(t) ]
        else:
            self.protect_names = None

        self.remove_file_field = butils.getbool(remove_file_field)
        self.remove_fields = CommaStrList(remove_fields)
        self.remove_doi_prefix = butils.getbool(remove_doi_prefix)

        self.map_annote_to_note = butils.getbool(map_annote_to_note)
        
        try:
            auto_urlify_bool = butils.getbool(auto_urlify) # raises ValueError if not a boolean
            self.auto_urlify = [ "note" ] if auto_urlify_bool else []
        except ValueError:
            self.auto_urlify = CommaStrList(auto_urlify)

        # make sure key (language alias) is made lower-case
        self.rename_language = dict([ (k.lower(), v)
                                      for k, v in iteritems(ColonCommaStrDict(rename_language)) ])
        self.rename_language_rx = None
        if self.rename_language:
            # e.g. with rename_language={'en':'english','de':'deutsch',
            # 'german':'deutsch', 'french':'francais'}, prepare the regexp
            # '^en|de|german|french$'. Case INsensitive.
            self.rename_language_rx = re.compile(
                r'^\s*(?P<lang>' +
                "|".join([re.escape(k.strip()) for k in self.rename_language]) +
                r'\s*)$',
                flags=re.IGNORECASE
                )

        fix_mendeley_bug_urls = BoolOrFieldList(fix_mendeley_bug_urls)
        if fix_mendeley_bug_urls.valuetype is bool:
            self.fix_mendeley_bug_urls = ['url'] if fix_mendeley_bug_urls.value else []
        else:
            self.fix_mendeley_bug_urls = fix_mendeley_bug_urls.value

        protect_capital_letter_after_dot = BoolOrFieldList(protect_capital_letter_after_dot)
        if protect_capital_letter_after_dot.valuetype is bool:
            self.protect_capital_letter_after_dot = ['title'] if protect_capital_letter_after_dot.value else []
        else:
            self.protect_capital_letter_after_dot = protect_capital_letter_after_dot.value

        protect_capital_letter_at_begin = BoolOrFieldList(protect_capital_letter_at_begin)
        if protect_capital_letter_at_begin.valuetype is bool:
            self.protect_capital_letter_at_begin = ['title'] if protect_capital_letter_at_begin.value else []
        else:
            self.protect_capital_letter_at_begin = protect_capital_letter_at_begin.value

        self.dbl_quote_macro = dbl_quote_macro
        self.sgl_quote_macro = sgl_quote_macro

        convert_dbl_quotes = BoolOrFieldList(convert_dbl_quotes)
        if convert_dbl_quotes.valuetype is CommaStrList:
            self.convert_dbl_quotes = convert_dbl_quotes.value
        else:
            # just passed a bool, e.g. 'True'
            self.convert_dbl_quotes = ['title','abstract','booktitle','series'] if convert_dbl_quotes.value else []
            
        convert_sgl_quotes = BoolOrFieldList(convert_sgl_quotes)
        if convert_sgl_quotes.valuetype is CommaStrList:
            self.convert_sgl_quotes = convert_sgl_quotes.value
        else:
            # just passed a bool, e.g. 'True'
            self.convert_sgl_quotes = ['title','abstract','booktitle','series'] if convert_sgl_quotes.value else []
        
        self.unprotect_full_last_names = unprotect_full_last_names

        logger.debug(('fixes filter: fix_space_after_escape=%r; encode_utf8_to_latex=%r; encode_latex_to_utf8=%r; '
                      'remove_type_from_phd=%r; '
                      'remove_pages_from_book=%r; '
                      'remove_full_braces=%r [fieldlist=%r, not lang=%r], '
                      'protect_names=%r, remove_file_field=%r, '
                      'remove_fields=%r, remove_doi_prefix=%r, fix_swedish_a=%r, '
                      'map_annote_to_note=%r, auto_urlify=%r, rename_language=%r, rename_language_rx=%r, '
                      'fix_mendeley_bug_urls=%r,'
                      'protect_capital_letter_after_dot=%r,protect_capital_letter_at_begin=%r,'
                      'convert_dbl_quotes=%r,dbl_quote_macro=%r,convert_sgl_quotes=%r,sgl_quote_macro=%r,'
                      'unprotect_full_last_names=%r')
                     % (self.fix_space_after_escape, self.encode_utf8_to_latex, self.encode_latex_to_utf8,
                        self.remove_type_from_phd, self.remove_pages_from_book,
                        self.remove_full_braces, self.remove_full_braces_fieldlist,
                        self.remove_full_braces_not_lang,
                        self.protect_names,
                        self.remove_file_field,
                        self.remove_fields, self.remove_doi_prefix, self.fix_swedish_a,
                        self.map_annote_to_note,
                        self.auto_urlify,
                        self.rename_language,
                        (self.rename_language_rx.pattern if self.rename_language_rx else None),
                        self.fix_mendeley_bug_urls,
                        self.protect_capital_letter_after_dot, self.protect_capital_letter_at_begin,
                        self.convert_dbl_quotes,self.dbl_quote_macro,self.convert_sgl_quotes,self.sgl_quote_macro,
                        self.unprotect_full_last_names
                        ))
Example #17
0
    def __init__(self,
                 mode="eprint",
                 unpublished_mode=None,
                 arxiv_journal_name="ArXiv e-prints",
                 strip_unpublished_fields=[],
                 note_string="",
                 note_string_fmt="",
                 no_archive_prefix=False,
                 default_archive_prefix="arXiv",
                 no_primary_class_for_old_ids=False,
                 no_primary_class=False,
                 theses_count_as_published=False,
                 warn_journal_ref=True):
        """
        Constructor method for ArxivNormalizeFilter

        Arguments:
          - mode(Mode):  the behavior to adopt for published articles which also have an arxiv ID
          - unpublished_mode(Mode): the behavior to adopt for unpublished articles who have an arxiv
                   ID (if None, use the same mode as `mode').
          - strip_unpublished_fields(CommaStrList): (all modes): a list of bibtex fields to remove
                   from all unpublished entries.
          - arxiv_journal_name: (in eprint mode): the string to set the journal={} entry to for
                   unpublished entries
          - note_string: (obsolete, prefer -sNoteStringFmt) the string to insert in the `note' field
                   (for modes 'unpublished-note', 'note', and 'unpublished-note-notitle'). Use
                   `%(arxivid)s' to include the ArXiv ID itself in the string. Default:
                   '{arXiv:%(arxivid)s}'. Possible substitutions keys are
                   'arxivid','primaryclass','published','doi'. You can't specify both (-sNoteString
                   and -sNoteStringFmt).
          - note_string_fmt: the string to insert in the `note' field for modes 'unpublished-note',
                   'note' and 'unpublished-note-notitle'. This field uses Python's new advanced
                   formatting mini-language (see `string.Formatter`). The available fields and
                   formats are documented below in the filter documentation.
          - no_archive_prefix(bool): If set, then removes the 'archiveprefix' key entirely.
          - default_archive_prefix: In `eprint' mode, entries which don't have an archive prefix are
                   given this one. Additionally, other entries whose archive prefix match this one
                   up to letter casing are adjusted to this one. (Default: "arXiv")
          - no_primary_class_for_old_ids(bool): if True, then in `eprint' mode no 'primaryclass' field
                   is set if the entry has an "old" arXiv ID identifier already containing the
                   primary-class, e.g. "quant-ph/YYYYZZZ".
          - no_primary_class(bool): if True, then the `primaryclass' field is always stripped.
          - theses_count_as_published(bool): if True, then entries of type @phdthesis and
                   @mastersthesis count as published entries, otherwise not (the default).
          - warn_journal_ref(bool): if True, then for all articles that look unpublished in our
                   database, but for which the arXiv.org API reports a published version, we produce
                   a warning (this is the default; set this option to false to suppress these
                   warnings).
        """
        
        BibFilter.__init__(self);

        self.mode = Mode(mode);
        self.unpublished_mode = (Mode(unpublished_mode) if unpublished_mode is not None
                                 else self.mode);
        self.strip_unpublished_fields = CommaStrList(strip_unpublished_fields)
        self.arxiv_journal_name = arxiv_journal_name;
        self.note_string = note_string;
        self.note_string_fmt = note_string_fmt;
        if (self.note_string and self.note_string_fmt):
            raise BibFilterError('arXiv', "Can't give both -sNoteString and -sNoteStringFmt !")
        if not self.note_string and not self.note_string_fmt:
            # nothing given, defaults to:
            self.note_string_fmt = "{{arXiv:{arxiv.arxivid}{if:(arxiv.isnewarxivid)( [{arxiv.primaryclass}])}}"
        self.no_archive_prefix = no_archive_prefix;
        self.default_archive_prefix = default_archive_prefix;
        self.no_primary_class_for_old_ids = butils.getbool(no_primary_class_for_old_ids);
        self.no_primary_class = butils.getbool(no_primary_class);
        self.theses_count_as_published = butils.getbool(theses_count_as_published);

        self.warn_journal_ref = butils.getbool(warn_journal_ref);

        logger.debug('arxiv filter constructor: mode=%s; unpublished_mode=%s' % (self.mode, self.unpublished_mode));
Example #18
0
    def __init__(self,
                 fix_space_after_escape=False,
                 encode_utf8_to_latex=False,
                 encode_latex_to_utf8=False,
                 remove_type_from_phd=False,
                 remove_full_braces=False,
                 remove_full_braces_not_lang=[],
                 protect_names=None,
                 remove_file_field=False,
                 remove_fields=[],
                 remove_doi_prefix=False,
                 map_annote_to_note=False,
                 auto_urlify=False,
                 rename_language={},
                 fix_mendeley_bug_urls=False,
                 fix_swedish_a=False):
        """
        Constructor method for FixesFilter

        Filter Arguments:
          - fix_space_after_escape(bool): transform `\\AA berg' and `M\\o ller' into `\\AA{}berg',
               `M\\o{}ller' to avoid bibtex styles from wrongfully splitting these words.
          - encode_utf8_to_latex(bool): encode known non-ascii characters into latex escape sequences.
          - encode_latex_to_utf8(bool): encode known latex escape sequences to unicode text (utf-8).
          - remove_type_from_phd(bool): Removes any `type=' field from @phdthesis{..} bibtex entries.
          - remove_full_braces: removes overprotective global braces in field values.
          - remove_full_braces_not_lang(CommaStrList): (in conjunction with --remove-full-braces) removes the
            overprotective global braces only if the language of the entry (as per language={..} bibtex field)
            is not in the given list (case insensitive).
          - protect_names: list of names to protect from bibtex style casing.
          - remove_file_field(bool): removes file={...} fields from all entries.
          - remove_fields(CommaStrList): removes given fields from all entries.
          - remove_doi_prefix(bool): removes `doi:' prefix from all DOIs, if present
          - map_annote_to_note(bool): maps `annote' bibtex field to a `note' field
          - auto_urlify: automatically wrap URLs into `\\url{}' commands
          - rename_language(ColonCommaStrDict): replace e.g. `de' by `Deutsch'. Use
                format `alias1:language1,alias2:language2...'.
          - fix_mendeley_bug_urls(bool): fix the `url' field for Mendeley's
                buggy output. Pass on a list of fields (comma-separated) to specify
                which fields to act on; by default if enabled only 'url'.
          - fix_swedish_a(bool): (OBSOLETE, use -dFixSpaceAfterEscape instead.) 
                transform `\\AA berg' into `\\AA{}berg' for `\\AA' and `\\o' (this
                problem occurs in files generated e.g. by Mendeley); revtex tends to
                insert a blank after the `\\AA' or `\\o' otherwise.
        """
        
        BibFilter.__init__(self);

        self.fix_space_after_escape = butils.getbool(fix_space_after_escape);
        self.fix_swedish_a = butils.getbool(fix_swedish_a); # OBSOLETE

        if (self.fix_swedish_a):
            logger.warning("Fixes Filter: option -dFixSwedishA is now obsolete, in favor of the more"
                           " general and better option -dFixSpaceAfterEscape. The old option will"
                           " still work for backwards compatibility, but please consider changing to"
                           " the new option.")

        self.encode_utf8_to_latex = butils.getbool(encode_utf8_to_latex);
        self.encode_latex_to_utf8 = butils.getbool(encode_latex_to_utf8);

        if (self.encode_utf8_to_latex and self.encode_latex_to_utf8):
            raise BibFilterError("Conflicting options: `encode_utf8_to_latex' and `encode_latex_to_utf8'.");

        self.remove_type_from_phd = butils.getbool(remove_type_from_phd);

        try:
            self.remove_full_braces = butils.getbool(remove_full_braces);
            self.remove_full_braces_fieldlist = None; # all fields
        except ValueError:
            # not boolean, we have provided a field list.
            self.remove_full_braces = True;
            self.remove_full_braces_fieldlist = [ x.strip().lower() for x in remove_full_braces.split(',') ];

        if self.remove_full_braces:
            if not remove_full_braces_not_lang:
                self.remove_full_braces_not_lang = []
            else:
                self.remove_full_braces_not_lang = [
                    x.lower()
                    for x in CommaStrList(remove_full_braces_not_lang)
                    ]
        else:
            self.remove_full_braces_not_lang = None

        if protect_names is not None:
            self.protect_names = dict([ (x.strip(), re.compile(ur'\b'+re.escape(x.strip())+ur'\b', re.IGNORECASE))
                                        for x in protect_names.split(u',') ]);
        else:
            self.protect_names = None;

        self.remove_file_field = butils.getbool(remove_file_field);
        self.remove_fields = CommaStrList(remove_fields);
        self.remove_doi_prefix = butils.getbool(remove_doi_prefix)

        self.map_annote_to_note = butils.getbool(map_annote_to_note)
        
        try:
            auto_urlify_bool = butils.getbool(auto_urlify) # raises ValueError if not a boolean
            self.auto_urlify = [ "note" ] if auto_urlify_bool else []
        except ValueError:
            self.auto_urlify = CommaStrList(auto_urlify)

        # make sure key (language alias) is made lower-case
        self.rename_language = dict([ (k.lower(), v)
                                      for k, v in ColonCommaStrDict(rename_language).iteritems() ])
        self.rename_language_rx = None
        if self.rename_language:
            # e.g. with rename_language={'en':'english','de':'deutsch',
            # 'german':'deutsch', 'french':'francais'}, prepare the regexp
            # '^en|de|german|french$'. Case INsensitive.
            self.rename_language_rx = re.compile(
                r'^\s*(?P<lang>' +
                "|".join([re.escape(k.strip()) for k in self.rename_language.iterkeys()]) +
                r'\s*)$',
                flags=re.IGNORECASE
                )

        if fix_mendeley_bug_urls:
            try:
                self.fix_mendeley_bug_urls = CommaStrList(fix_mendeley_bug_urls)
            except TypeError:
                # just passed, e.g., `True`
                self.fix_mendeley_bug_urls = ['url']
        else:
            self.fix_mendeley_bug_urls = []

        logger.debug(('fixes filter: fix_space_after_escape=%r; encode_utf8_to_latex=%r; encode_latex_to_utf8=%r; '
                      'remove_type_from_phd=%r; '
                      'remove_full_braces=%r [fieldlist=%r, not lang=%r], '
                      'protect_names=%r, remove_file_field=%r, '
                      'remove_fields=%r, remove_doi_prefix=%r, fix_swedish_a=%r, '
                      'map_annote_to_note=%r, auto_urlify=%r, rename_language=%r, rename_language_rx=%r, '
                      'fix_mendeley_bug_urls=%r')
                     % (self.fix_space_after_escape, self.encode_utf8_to_latex, self.encode_latex_to_utf8,
                        self.remove_type_from_phd,
                        self.remove_full_braces, self.remove_full_braces_fieldlist,
                        self.remove_full_braces_not_lang,
                        self.protect_names,
                        self.remove_file_field,
                        self.remove_fields, self.remove_doi_prefix, self.fix_swedish_a,
                        self.map_annote_to_note,
                        self.auto_urlify,
                        self.rename_language,
                        (self.rename_language_rx.pattern if self.rename_language_rx else None),
                        self.fix_mendeley_bug_urls
                        ));