예제 #1
0
def download():
    querier = ScholarQuerier()
    settings = ScholarSettings()
    settings.set_citation_format(ScholarSettings.CITFORM_BIBTEX)
    querier.apply_settings(settings)
    query = SearchScholarQuery()
    query.set_phrase("eelbrain")
    query.set_timeframe(2012, None)
    query.set_include_patents(False)
    # download entries
    bib = BibliographyData()
    start = 0
    while True:
        querier.send_query(query)
        if len(querier.articles) == 0:
            break
        # extract citation data
        for article in querier.articles:
            querier.get_citation_data(article)
            # parse entry
            data = parse_bytes(article.citation_data, 'bibtex')
            assert len(data.entries) == 1
            for key, entry in data.entries.items():
                # make sure URL is present
                if 'url' not in entry.fields:
                    url = article.attrs['url'][0]
                    if url:
                        entry.fields['url'] = url
                # store
                bib.add_entry(key, entry)
        # next page
        start += 10
        query.set_start(start)
    # write to file
    CACHE.write_bytes(bib.to_bytes('bibtex').replace(br'\\&', br'\&'))
예제 #2
0
    def createMaterialize(self, parent, token, page):
        ol = self.createHTML(parent, token, page)
        if ol is None:
            return

        for child in ol.children:
            key = child['id']
            db = BibliographyData()
            db.add_entry(key, self.extension.database().entries[key])
            btex = db.to_string("bibtex")

            m_id = uuid.uuid4()
            html.Tag(child,
                     'a',
                     style="padding-left:10px;",
                     class_='modal-trigger moose-bibtex-modal',
                     href="#{}".format(m_id),
                     string='[BibTeX]')

            modal = html.Tag(child, 'div', class_='modal', id_=m_id)
            content = html.Tag(modal, 'div', class_='modal-content')
            pre = html.Tag(content, 'pre', style="line-height:1.25;")
            html.Tag(pre, 'code', class_='language-latex', string=btex)

        return ol
예제 #3
0
def main():
    doi = _extract_doi(args.identifier[0])

    if doi is None:
        print(item)
    elif args.bibtex:
        result = cn.content_negotiation(doi, format="bibtex")
        bibtex = parse_string(result, "bibtex")
        try:
            name = "".join(
                bibtex.entries.values()[0].persons.values()[0][0].last_names)
            name = name.replace("ä", "ae").replace("ö",
                                                   "oe").replace("ü", "ue")
            name = unidecode(name)
            shortdoi = _short_doi(doi)[3:]
            year = bibtex.entries.values()[0].fields["year"]
            key = "{}_{}_{}".format(name, year, shortdoi)
            new = BibliographyData()
            new.add_entry(key, bibtex.entries[bibtex.entries.keys()[0]])
            print(new.to_string("bibtex"))
        except KeyError:
            print(result)
    else:
        try:
            result = cn.content_negotiation(doi, format=args.format)
            print(result)
        except requests.exceptions.HTTPError:
            print(doi)
    print()
예제 #4
0
파일: bibtex.py 프로젝트: aashiquear/moose
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """

    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        config['duplicate_warning'] = (True, "Show a warning when duplicate entries detected.")
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = None
        self.__citations = set()

    def preExecute(self, content):

        self.__database = BibliographyData()

        bib_files = []
        for node in content:
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            warn = self.get('duplicate_warning')
            for key in db.entries:
                if key in self.__database.entries:
                    if warn:
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                else:
                    self.__database.add_entry(key, db.entries[key])

    @property
    def database(self):
        return self.__database

    def extend(self, reader, renderer):
        self.requires(core, command)

        self.addCommand(reader, BibtexCommand())

        reader.addInline(BibtexReferenceComponent(), location='>FormatInline')

        renderer.add('BibtexCite', RenderBibtexCite())
        renderer.add('BibtexBiliography', RenderBibtexBibliography())
예제 #5
0
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """
    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        config['duplicate_warning'] = (
            True, "Show a warning when duplicate entries detected.")
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = None
        self.__citations = set()

    def preExecute(self, content):

        self.__database = BibliographyData()

        bib_files = []
        for node in content:
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            warn = self.get('duplicate_warning')
            for key in db.entries:
                if key in self.__database.entries:
                    if warn:
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                else:
                    self.__database.add_entry(key, db.entries[key])

    @property
    def database(self):
        return self.__database

    def extend(self, reader, renderer):
        self.requires(core, command)

        self.addCommand(reader, BibtexCommand())

        reader.addInline(BibtexReferenceComponent(), location='>FormatInline')

        renderer.add('BibtexCite', RenderBibtexCite())
        renderer.add('BibtexBiliography', RenderBibtexBibliography())
예제 #6
0
파일: bibtex.py 프로젝트: FHilty/moose
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """

    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = BibliographyData()
        self.__citations = set()

    def init(self, translator):
        command.CommandExtension.init(self, translator)

        bib_files = []
        for node in anytree.PreOrderIter(self.translator.root):
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            for key in db.entries:
                if key in self.__database.entries:
                    msg = "The BibTeX entry '%s' defined in %s already exists."
                    LOG.warning(msg, key, bfile)
                else:
                    self.__database.add_entry(key, db.entries[key])

    @property
    def database(self):
        return self.__database

    def extend(self, reader, renderer):
        self.requires(command)

        self.addCommand(BibtexCommand())

        reader.addInline(BibtexReferenceComponent(), location='>Format')

        renderer.add(BibtexCite, RenderBibtexCite())
        renderer.add(BibtexBibliography, RenderBibtexBibliography())
예제 #7
0
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """
    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = BibliographyData()
        self.__citations = set()

    def init(self, translator):
        command.CommandExtension.init(self, translator)

        bib_files = []
        for node in anytree.PreOrderIter(self.translator.root):
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            for key in db.entries:
                if key in self.__database.entries:
                    msg = "The BibTeX entry '%s' defined in %s already exists."
                    LOG.warning(msg, key, bfile)
                else:
                    self.__database.add_entry(key, db.entries[key])

    @property
    def database(self):
        return self.__database

    def extend(self, reader, renderer):
        self.requires(command)

        self.addCommand(BibtexCommand())

        reader.addInline(BibtexReferenceComponent(), location='>Format')

        renderer.add(BibtexCite, RenderBibtexCite())
        renderer.add(BibtexBibliography, RenderBibtexBibliography())
예제 #8
0
def make_bibliography(table):  # pragma: nocover
    db = BibliographyData()
    for row in table:
        try:
            entry = row_to_bibentry(row)
        except ValueError as e:
            print('Reference',
                  row.get('Reference_ID'),
                  'dropped:',
                  str(e),
                  file=sys.stderr)
            continue
        db.add_entry(entry.key, entry)
    return db
예제 #9
0
파일: bibmod.py 프로젝트: mfa/webbib
    def load_bib(self, filename='IMSfull.bib'):
        parser = bibtex.Parser()
        bib_data = parser.parse_file(filename)
        self.lastload = os.path.getmtime(filename)
        self.filename = filename
        pubs = []
        index_keys = {}
        index_bibkeys = {}
        for key, elem in bib_data.entries.iteritems():
            entry = elem.fields

            # generate original bibtex
            # using StringIO and bibtex.writer
            a = BibliographyData()
            a.add_entry(key, elem)
            output = StringIO.StringIO()
            w = Writer()
            w.write_stream(a, output)
            entry['bibtex'] = output.getvalue()

            # sha1 for absolute unique keys
            x = hashlib.sha1(simplejson.dumps(entry))
            entry['key'] = x.hexdigest()
            entry['authors'] = self.parse_authors(elem.persons)
            entry['bibkey'] = elem.key

            # keywords
            entry['keywords'] = []
            if entry.get('keyword'):
                for i in entry['keyword'].split(','):
                    entry['keywords'].append(i.strip())
            entry['reference'] = self.render_references(elem.type, entry)

            # append to pubs
            pubs.append(entry)
            index_keys[x.hexdigest()] = len(pubs) - 1
            index_bibkeys[elem.key] = len(pubs) - 1
            if 'year' not in entry:
                entry['year'] = ''
        # set at end -> less time for threading problems
        self.index_keys = index_keys
        self.index_bibkeys = index_bibkeys
        self.pubs = pubs
예제 #10
0
파일: bibmod.py 프로젝트: mfa/webbib
    def load_bib(self, filename="IMSfull.bib"):
        parser = bibtex.Parser()
        bib_data = parser.parse_file(filename)
        self.lastload = os.path.getmtime(filename)
        self.filename = filename
        pubs = []
        index_keys = {}
        index_bibkeys = {}
        for key, elem in bib_data.entries.iteritems():
            entry = elem.fields

            # generate original bibtex
            # using StringIO and bibtex.writer
            a = BibliographyData()
            a.add_entry(key, elem)
            output = StringIO.StringIO()
            w = Writer()
            w.write_stream(a, output)
            entry["bibtex"] = output.getvalue()

            # sha1 for absolute unique keys
            x = hashlib.sha1(simplejson.dumps(entry))
            entry["key"] = x.hexdigest()
            entry["authors"] = self.parse_authors(elem.persons)
            entry["bibkey"] = elem.key

            # keywords
            entry["keywords"] = []
            if entry.get("keyword"):
                for i in entry["keyword"].split(","):
                    entry["keywords"].append(i.strip())
            entry["reference"] = self.render_references(elem.type, entry)

            # append to pubs
            pubs.append(entry)
            index_keys[x.hexdigest()] = len(pubs) - 1
            index_bibkeys[elem.key] = len(pubs) - 1
            if "year" not in entry:
                entry["year"] = ""
        # set at end -> less time for threading problems
        self.index_keys = index_keys
        self.index_bibkeys = index_bibkeys
        self.pubs = pubs
예제 #11
0
파일: bibtex.py 프로젝트: FHilty/moose
    def createMaterialize(self, token, parent):
        ol = self.createHTML(token, parent)

        for child in ol.children:
            key = child['id']
            db = BibliographyData()
            db.add_entry(key, self.extension.database.entries[key])
            btex = db.to_string("bibtex")

            m_id = uuid.uuid4()
            html.Tag(child, 'a',
                     style="padding-left:10px;",
                     class_='modal-trigger moose-bibtex-modal',
                     href="#{}".format(m_id),
                     string=u'[BibTeX]')

            modal = html.Tag(child, 'div', class_='modal', id_=m_id)
            content = html.Tag(modal, 'div', class_='modal-content')
            pre = html.Tag(content, 'pre', style="line-height:1.25;")
            html.Tag(pre, 'code', class_='language-latex', string=btex)
예제 #12
0
파일: documents.py 프로젝트: evdcush/dochub
def make_bib_entry(info, style='bibtex'):
    """ Makes a bibliography entry from the processed api info

    Uses pybtex to output a valid bibliography entry.
    style='bibtex' --> "standard" bibtex format
    style='yaml'   --> yaml format (easily convertible to bibtex)

    """
    # create instances
    bib_entry = BibliographyData()
    entry = Entry('article')
    fields = type(entry.fields)()  # pybtex.utils.OrderedCaseInsensitiveDict

    # helper
    def add_field(k):
        if k in info:
            v = info[k]
            if isinstance(v, list):
                v = ', '.join(v)
            fields[k] = str(v)

    #==== add fields
    add_field('year')
    add_field('title')
    add_field('author')
    add_field('arxivId')
    add_field('DOI')
    add_field('keywords')
    add_field('abstract')
    add_field('URL')
    add_field('pdf')
    add_field('filename')

    #==== update instances
    entry.fields = fields
    bib_entry.add_entry(info.identifier, entry)
    #return bib_entry.to_string('bibtex')
    #return bib_entry.to_string(style)
    return bib_entry.to_string(style).replace('\_', '_')
예제 #13
0
def main_cli():
    import argparse

    parser = argparse.ArgumentParser(
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('bib_path',
                        metavar='BIB_PATH',
                        type=arg_is_file,
                        help=('Path to bibtex-formatted file.'))
    parser.add_argument('-k',
                        '--keywords',
                        nargs='+',
                        type=str,
                        default=["OaksPeerReviewed", "OaksCVPreprint"],
                        help=('Keywords for reference filter.'))

    args = parser.parse_args()

    bib_parser = bibtex.Parser()
    bib_data = bib_parser.parse_file(args.bib_path)

    filtered_bib_data = BibliographyData()
    for key, entry in bib_data.entries.items():
        kwords = [
            x.strip() for x in entry.fields.get('keywords', '').split(',')
        ]
        for kw in args.keywords:
            if kw in kwords:
                filtered_bib_data.add_entry(entry.key, entry)

    s = filtered_bib_data.to_string("bibtex")
    s = s.replace("= \"", "= {")
    s = s.replace("\",\n", "},\n")
    s = s.replace("\"\n", "}\n")
    sys.stdout.write(s)
예제 #14
0
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """
    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        config['duplicate_warning'] = (
            True, "Show a warning when duplicate entries detected.")
        config['duplicates'] = (list(),
                                "A list of duplicates that are allowed.")
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = None
        self.__bib_files = list()
        self.__bib_file_database = dict()

    def preExecute(self):
        set_strict_mode(
            False)  # allow incorrectly formatted author/editor names

        # If this is invoked during a live serve, we need to recompile the list of '.bib' files and
        # read them again, otherwise there's no way to distinguish existing entries from duplicates
        self.__bib_files = []
        for node in self.translator.findPages(
                lambda p: p.source.endswith('.bib')):
            self.__bib_files.append(node.source)

        self.__database = BibliographyData()
        for bfile in self.__bib_files:
            try:
                db = parse_file(bfile)
                self.__bib_file_database[bfile] = db
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            for key in db.entries:
                if key in self.__database.entries:
                    if self.get('duplicate_warning') and (
                            key not in self.get('duplicates')):
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                else:
                    self.__database.add_entry(key, db.entries[key])

    def preRead(self, page):
        """Initialize the page citations list."""
        page['citations'] = list()

    def postTokenize(self, page, ast):
        if page['citations']:
            has_bib = False
            for node in moosetree.iterate(ast):
                if node.name == 'BibtexBibliography':
                    has_bib = True
                    break

            if not has_bib:
                core.Heading(ast, level=2, string='References')
                BibtexBibliography(ast, bib_style='plain')

    def database(self, bibfile=None):
        if bibfile is None:
            return self.__database
        else:
            return self.__bib_file_database[bibfile]

    def bibfiles(self):
        return self.__bib_files

    def extend(self, reader, renderer):
        self.requires(core, command)

        self.addCommand(reader, BibtexCommand())
        self.addCommand(reader, BibtexListCommand())
        self.addCommand(reader, BibtexReferenceComponent())

        renderer.add('BibtexCite', RenderBibtexCite())
        renderer.add('BibtexList', RenderBibtexList())
        renderer.add('BibtexBibliography', RenderBibtexBibliography())

        if isinstance(renderer, LatexRenderer):
            renderer.addPackage('natbib', 'round')
예제 #15
0
파일: bibtex.py 프로젝트: jwpeterson/moose
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """

    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        config['duplicate_warning'] = (True, "Show a warning when duplicate entries detected.")
        config['duplicates'] = (list(), "A list of duplicates that are allowed.")
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = None
        self.__citations = set()

    def initMetaData(self, page, meta):
        meta.initData('citations', set())

    def addCitations(self, *args):
        self.__citations.update(args)

    def preExecute(self, content):

        duplicates = self.get('duplicates', list())
        self.__database = BibliographyData()

        bib_files = []
        for node in content:
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e.message)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            warn = self.get('duplicate_warning')
            for key in db.entries:
                duplicate_key = key in self.__database.entries
                duplicate_key_allowed = key in duplicates
                if duplicate_key and (not duplicate_key_allowed):
                    if warn:
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                elif not duplicate_key:
                    self.__database.add_entry(key, db.entries[key])

    def postTokenize(self, ast, page, meta, reader):
        if self.__citations:
            meta.getData('citations').update(self.__citations)
            self.__citations.clear()

            has_bib = False
            for node in anytree.PreOrderIter(ast):
                if node.name == 'BibtexBibliography':
                    has_bib = True
                    break

            if not has_bib:
                BibtexBibliography(ast)

    @property
    def database(self):
        return self.__database

    def extend(self, reader, renderer):
        self.requires(core, command)

        self.addCommand(reader, BibtexCommand())
        self.addCommand(reader, BibtexReferenceComponent())
        reader.addInline(BibtexReferenceComponentDeprecated(), location='>FormatInline')

        renderer.add('BibtexCite', RenderBibtexCite())
        renderer.add('BibtexBibliography', RenderBibtexBibliography())

        if isinstance(renderer, LatexRenderer):
            renderer.addPackage('natbib', 'round')
예제 #16
0
style = MyStyle()
back = HtmlBackend()
bib_data = parse_file(betterbibfile)
entries = bib_data.entries
keys = entries.keys()

cite_type = ["article", "book"]

#### presort
contents = []
for c in cite_type:
    bib = BibliographyData()
    for k in keys:
        etype = entries[k].original_type
        if etype == c:
            bib.add_entry(entry=entries[k], key=k)
    formatbib = style.format_bibliography(bib)
    outfile = "out.html"
    back.write_to_file(formatbib, outfile)
    with open(outfile, "r") as f:
        contents.append(f.read())

bib = BibliographyData()
for k in keys:
    etype = entries[k].original_type
    if etype == "inproceedings":
        bib.add_entry(entry=entries[k], key=k)
entries = bib.entries
keys = entries.keys()

for t in ["lecture comittee", "conference", "france"]:
예제 #17
0
    def filter_bibolamazifile(self, bibolamazifile):
        #
        # bibdata is a pybtex.database.BibliographyData object
        #

        if (not self.dupfile and not self.warn):
            logger.warning("duplicates filter: No action is being taken because neither "
                           "-sDupfile= nor -dWarn have been requested.")
            return

        bibdata = bibolamazifile.bibliographyData();

        used_citations = None
        
        if self.keep_only_used_in_jobname:
            if not self.dupfile:
                logger.warning("Option -sKeepOnlyUsedInJobname has no effect without -sDupfile=... !")
            else:
                logger.debug("Getting list of used citations from %s.aux." %(self.keep_only_used_in_jobname))
                used_citations = auxfile.get_all_auxfile_citations(
                    self.keep_only_used_in_jobname, bibolamazifile, self.name(),
                    self.jobname_search_dirs, return_set=True
                )

        duplicates = [];

        arxivaccess = arxivutil.setup_and_get_arxiv_accessor(bibolamazifile)

        dupl_entryinfo_cache_accessor = self.cacheAccessor(DuplicatesEntryInfoCacheAccessor)

        for (key, entry) in bibdata.entries.iteritems():
            #cache_entries[key] = {}
            dupl_entryinfo_cache_accessor.prepare_entry_cache(key, entry, arxivaccess)


        newbibdata = BibliographyData();
        unused = BibliographyData();
        #unused_respawned = set() # because del unused.entries[key] is not implemented ... :(

        def copy_entry(entry):
            #return copy.deepcopy(entry) # too deep ...
            newpers = {}
            for role, plist in entry.persons.iteritems():
                newpers[role] = [copy.deepcopy(p) for p in plist]
            return Entry(type_=entry.type,
                         fields=entry.fields.items(), # will create own Fielddict
                         persons=newpers,
                         collection=entry.collection
                         )

        # Strategy: go through the list of entries, and each time keeping it if it is new,
        # or updating the original and registering the alias if it is a duplicate.
        #
        # With only_used, the situation is a little trickier as we cannot just discard the
        # entries as they are filtered: indeed, they might be duplicates of a used entry,
        # with which one should merge the bib information.
        #
        # So the full algorithm does not immediately discard the unused keys, but rather
        # keeps them in an `unused` list. If they are later required, they are respawned
        # into the actual new list.
        #

        for (key, entry) in bibdata.entries.iteritems():
            #
            # search the newbibdata object, in case this entry already exists.
            #
            #logger.longdebug('inspecting new entry %s ...', key);
            is_duplicate_of = None
            duplicate_original_is_unused = False
            for (nkey, nentry) in newbibdata.entries.iteritems():
                if self.compare_entries_same(entry, nentry, dupl_entryinfo_cache_accessor.get_entry_cache(key),
                                             dupl_entryinfo_cache_accessor.get_entry_cache(nkey)):
                    logger.longdebug('    ... matches existing entry %s!', nkey);
                    is_duplicate_of = nkey;
                    break
            for (nkey, nentry) in unused.entries.iteritems():
                #if nkey in unused_respawned:
                #    continue
                if self.compare_entries_same(entry, nentry, dupl_entryinfo_cache_accessor.get_entry_cache(key),
                                             dupl_entryinfo_cache_accessor.get_entry_cache(nkey)):
                    logger.longdebug('    ... matches existing entry %s!', nkey);
                    is_duplicate_of = nkey;
                    duplicate_original_is_unused = True
                    break

            #
            # if it's a duplicate
            #
            if is_duplicate_of is not None:
                dup = (key, is_duplicate_of)
                if duplicate_original_is_unused:
                    self.update_entry_with_duplicate(is_duplicate_of, unused.entries[is_duplicate_of],
                                                     key, entry)
                else:
                    # a duplicate of a key we have used. So update the original ...
                    self.update_entry_with_duplicate(is_duplicate_of, newbibdata.entries[is_duplicate_of],
                                                     key, entry)
                    # ... and register the alias.
                    duplicates.append(dup);

                if duplicate_original_is_unused and used_citations and key in used_citations:
                    # if we had set the original in the unused list, but we need the
                    # alias, then respawn the original to the newbibdata so we can refer
                    # to it. Bonus: use the name with which we have referred to it, so we
                    # don't need to register any duplicate.
                    newbibdata.add_entry(key, unused.entries[is_duplicate_of])
                    #unused_respawned.add(is_duplicate_of)
                    del unused.entries[is_duplicate_of]
            else:
                if used_citations is not None and key not in used_citations:
                    # new entry, but we don't want it. So add it to the unused list.
                    unused.add_entry(key, entry)
                else:
                    # new entry and we want it. So add it to the main newbibdata list.
                    newbibdata.add_entry(key, entry)

        # output duplicates to the duplicates file

        if (self.dupfile):
            # and write definitions to the dupfile
            dupfilepath = os.path.join(bibolamazifile.fdir(), self.dupfile);
            check_overwrite_dupfile(dupfilepath);
            dupstrlist = [];
            
            with codecs.open(dupfilepath, 'w', 'utf-8') as dupf:
                
                dupf.write(BIBALIAS_HEADER.replace('####DUP_FILE_NAME####', self.dupfile));
                
                if not self.custom_bibalias:
                    dupf.write(BIBALIAS_LATEX_DEFINITIONS)
                    
                # Note: Sort entries in some way (e.g. alphabetically according to
                # (alias, original)), to avoid diffs in VCS's
                for (dupalias, duporiginal) in sorted(duplicates, key=lambda x: (x[0],x[1])):
                    dupf.write((r'\bibalias{%s}{%s}' % (dupalias, duporiginal)) + "\n");
                    dupstrlist.append("\t%s is an alias of %s" % (dupalias,duporiginal)) ;

                dupf.write('\n\n');

            # issue debug message
            logger.debug("wrote duplicates to file: \n" + "\n".join(dupstrlist));

        if (self.warn and duplicates):
            def warnline(dupalias, duporiginal):
                def fmt(key, entry, cache_entry):
                    s = ", ".join(string.capwords('%s, %s' % (x[0], "".join(x[1]))) for x in cache_entry['pers']);
                    if 'title_clean' in cache_entry and cache_entry['title_clean']:
                        s += ', "' + (cache_entry['title_clean']).capitalize() + '"'
                    if 'j_abbrev' in cache_entry and cache_entry['j_abbrev']:
                        s += ', ' + cache_entry['j_abbrev']

                    f = entry.fields
                    if f.get('month',None) and f.get('year',None):
                        s += ', ' + f['month'] + ' ' + f['year']
                    elif f.get('month', None):
                        s += ', ' + f['month'] + ' <unknown year>'
                    elif f.get('year', None):
                        s += ', ' + f['year']
                        
                    if 'doi' in entry.fields and entry.fields['doi']:
                        s += ', doi:'+entry.fields['doi']
                    if 'arxivinfo' in cache_entry and cache_entry['arxivinfo']:
                        s += ', arXiv:'+cache_entry['arxivinfo']['arxivid']
                    if 'note_cleaned' in cache_entry and cache_entry['note_cleaned']:
                        s += '; ' + cache_entry['note_cleaned']

                    return s

                tw = textwrap.TextWrapper(width=DUPL_WARN_ENTRY_COLWIDTH)

                fmtalias = fmt(dupalias, bibdata.entries[dupalias],
                               dupl_entryinfo_cache_accessor.get_entry_cache(dupalias))
                fmtorig = fmt(duporiginal, bibdata.entries[duporiginal],
                              dupl_entryinfo_cache_accessor.get_entry_cache(duporiginal))
                linesalias = tw.wrap(fmtalias)
                linesorig = tw.wrap(fmtorig)
                maxlines = max(len(linesalias), len(linesorig))
                return (DUPL_WARN_ENTRY % { 'alias': dupalias,
                                            'orig': duporiginal
                                            }
                        + "\n".join( ('%s%s%s%s' %(' '*DUPL_WARN_ENTRY_BEGCOL,
                                                   linealias + ' '*(DUPL_WARN_ENTRY_COLWIDTH-len(linealias)),
                                                   ' '*DUPL_WARN_ENTRY_COLSEP,
                                                   lineorig)
                                      for (linealias, lineorig) in
                                      zip(linesalias + ['']*(maxlines-len(linesalias)),
                                          linesorig + ['']*(maxlines-len(linesorig)))) )
                        + "\n\n"
                        )
            logger.warning(DUPL_WARN_TOP  +
                           "".join([ warnline(dupalias, duporiginal)
                                     for (dupalias, duporiginal) in duplicates
                                     ])  +
                           DUPL_WARN_BOTTOM % {'num_dupl': len(duplicates)});

        # ### TODO: do this not only if we are given a dupfile?
        #if self.dupfile:
        # ### --> Bibolamazi v3: also set this if no dupfile was given. This is because we
        # ###     are moving entries themselves around and modifying them anyway
        #
        # set the new bibdata, without the duplicates
        # DON'T DO THIS, BECAUSE CACHES MAY HAVE KEPT A POINTER TO THE BIBDATA.
        #bibolamazifile.setBibliographyData(newbibdata);
        #
        # Instead, update bibolamazifile's bibliographyData() object itself.
        #
        bibolamazifile.setEntries(newbibdata.entries.iteritems())
        
        return
예제 #18
0
    tags = json.load(tag_file)

tagged = list(tags.keys())


from pybtex.database import BibliographyData, Entry

master_data = BibliographyData( {
    'article-minimal': Entry('article', [
        ('author', 'Leslie B. Lamport'),
        ('title', "blah blah blah"),
        ('journal', "Some outlet"),
        ('year', '1986'),
    ]),
})

# handle duplicates
for package in packages:
    subpackages = packages[package].split()
    for subpackage in subpackages:
        package_bib = "tmp/{subpackage}/doc/_static/references.bib".format(subpackage=subpackage)
        if os.path.isfile(package_bib):
            local = pybtex.database.parse_file(package_bib)
            for entry in local.entries:
                if entry not in master_data.entries:
                    master_data.add_entry(entry, local.entries[entry])
                    print('adding', entry)

with open("doc/_static/references.bib", 'w') as master_bib:
    master_bib.write(master_data.to_string('bibtex'))
예제 #19
0
def create_overleaf_files(overleaf):
    files = []

    articles = get_project_articles(FIGSHARE_PROJECT_ID)
    #print(articles)
    for article in articles:
        #print(article['title'])
        newfiles = get_files_of_article(article['id'])
        for i, f in enumerate(newfiles):
            newfiles[i]['article_id'] = article['id']
            newfiles[i]['article_name'] = article['title']
        files += newfiles

    fdf = pd.DataFrame(files)
    #print("fdf",fdf)

    fdf.sort_values(by=['article_id', 'article_name', 'name'])
    fdfo = fdf[['article_id', 'article_name', 'name']]
    fdfo = fdfo.merge(overleaf[['article_id', 'name', 'overleaf']],
                      on=['article_id', 'name'],
                      how='outer')

    #print("fdfo", fdfo)

    fdfo = fdfo.where(pd.notnull(fdfo), None)

    for_download = overleaf.merge(fdf[['article_id', 'name', 'download_url']],
                                  on=['article_id', 'name'])

    #print("for_download",for_download)

    # create individual files
    for row in for_download.iterrows():
        if len(row[1]['overleaf']) > 0:
            download_url = row[1]['download_url']
            file = raw_issue_request('GET', download_url, binary=True)
            if '.pkl' in row[1]['name']:
                with open(
                        '/mnt/labbook/output/untracked/tmp_overleaf-{}/{}'.
                        format(head, row[1]['name']), 'wb') as f:
                    f.write(file)
                df = pd.read_pickle(
                    '/mnt/labbook/output/untracked/tmp_overleaf-{}/{}'.format(
                        head, row[1]['name']))
                df.to_latex(
                    '/mnt/labbook/output/untracked/overleaf-{}/figshare/{}.tex'
                    .format(head, row[1]['overleaf']))
                repo.git.add('figshare/{}.tex'.format(row[1]['overleaf']))
            else:
                extension = row[1]['name'].split('.')[-1]
                with open(
                        '/mnt/labbook/output/untracked/overleaf-{}/figshare/{}.{}'
                        .format(head, row[1]['overleaf'],
                                extension), 'wb') as f:
                    f.write(file)
                    repo.git.add('figshare/{}.{}'.format(
                        row[1]['overleaf'], extension))

    # create bibliography file
    adf = pd.DataFrame(articles)
    #print(adf)
    bib_data = BibliographyData()

    for row in for_download.iterrows():

        if len(row[1]['overleaf']) > 0:
            idx = adf[adf['id'] == row[1]['article_id']].index[0]
            bib_data.add_entry(key=row[1]['overleaf'],
                               entry=Entry('article', [
                                   ('title', adf.at[idx, 'title']),
                                   ('journal', "figshare"),
                                   ('doi', adf.at[idx, 'doi']),
                               ]))

    bib_data.to_file(
        '/mnt/labbook/output/untracked/overleaf-{}/figures_tables.bib'.format(
            head))
    repo.git.add('figures_tables.bib')

    # write supplementary tex

    geometry_options = {"tmargin": "1cm", "lmargin": "1cm"}
    doc = ltx.Document(geometry_options=geometry_options)
    doc.preamble.append(ltx.Package('biblatex', options=['sorting=none']))
    doc.preamble.append(
        ltx.Command('addbibresource',
                    arguments=[ltx.NoEscape("figures_tables.bib")]))
    doc.preamble.append(ltx.Package('booktabs'))
    doc.preamble.append(ltx.Package('longtable'))

    with doc.create(ltx.Subsection('images and tables supplementary file')):
        for row in for_download.iterrows():
            if len(row[1]['overleaf']) > 0:
                idx = adf[adf['id'] == row[1]['article_id']].index[0]
                #print("The name is...",row[1]['name'])
                if '.pkl' in row[1]['name']:
                    #print("I should be including something here")
                    with doc.create(ltx.Table(position='hbt')) as table_holder:
                        table_holder.append(
                            ltx.Command('input',
                                        arguments=[
                                            ltx.NoEscape(
                                                "figshare/{}.tex".format(
                                                    row[1]['overleaf']))
                                        ]))
                        if row[1]['caption'] is not None:
                            table_holder.add_caption(row[1]['caption'])
                            with open(
                                    "/mnt/labbook/output/untracked/overleaf-{}/figshare/{}_caption.tex"
                                    .format(head, row[1]['overleaf']),
                                    "w") as text_file:
                                text_file.write(row[1]['caption'])
                        else:
                            table_holder.add_caption(adf.at[idx, 'title'])
                            with open(
                                    "/mnt/labbook/output/untracked/overleaf-{}/figshare/{}_caption.tex"
                                    .format(head, row[1]['overleaf']),
                                    "w") as text_file:
                                text_file.write(adf.at[idx, 'title'])
                        repo.git.add('figshare/{}_caption.tex'.format(
                            row[1]['overleaf']))
                        table_holder.append(
                            ltx.Command(
                                'cite',
                                arguments=[ltx.NoEscape(row[1]['overleaf'])]))

                else:
                    with doc.create(
                            ltx.Figure(position='hbt')) as image_holder:
                        image_holder.add_image('figshare/{}'.format(
                            row[1]['overleaf']))
                        #print("THE CAPTION IS:", row[1]['caption'])
                        if row[1]['caption'] is not None:
                            image_holder.add_caption(row[1]['caption'])
                            with open(
                                    "/mnt/labbook/output/untracked/overleaf-{}/figshare/{}_caption.tex"
                                    .format(head, row[1]['overleaf']),
                                    "w") as text_file:
                                text_file.write(
                                    ltx.utils.escape_latex(row[1]['caption']))
                        else:
                            image_holder.add_caption(
                                ltx.utils.escape_latex(adf.at[idx, 'title']))
                            with open(
                                    "/mnt/labbook/output/untracked/overleaf-{}/figshare/{}_caption.tex"
                                    .format(head, row[1]['overleaf']),
                                    "w") as text_file:
                                text_file.write(
                                    ltx.utils.escape_latex(adf.at[idx,
                                                                  'title']))
                        repo.git.add('figshare/{}_caption.tex'.format(
                            row[1]['overleaf']))
                        image_holder.append(
                            ltx.Command(
                                'cite',
                                arguments=[ltx.NoEscape(row[1]['overleaf'])]))

    doc.append(ltx.Command('printbibliography'))

    doc.generate_tex(
        '/mnt/labbook/output/untracked/overleaf-{}/supplementary'.format(head))
    repo.git.add('supplementary.tex')
예제 #20
0
from pybtex.database import BibliographyData, Entry

master_data = BibliographyData({
    "article-minimal":
    Entry(
        "article",
        [
            ("author", "Leslie B. Lamport"),
            ("title", "blah blah blah"),
            ("journal", "Some outlet"),
            ("year", "1986"),
        ],
    )
})

# handle duplicates
for package in packages:
    subpackages = packages[package].split()
    for subpackage in subpackages:
        package_bib = "tmp/{subpackage}/doc/_static/references.bib".format(
            subpackage=subpackage)
        if os.path.isfile(package_bib):
            local = pybtex.database.parse_file(package_bib)
            for entry in local.entries:
                if entry not in master_data.entries:
                    master_data.add_entry(entry, local.entries[entry])
                    print("adding", entry)

with open("doc/_static/references.bib", "w") as master_bib:
    master_bib.write(master_data.to_string("bibtex"))
예제 #21
0
파일: bibtex.py 프로젝트: amcheniour/moose
class BibtexExtension(command.CommandExtension):
    """
    Extension for BibTeX citations and bibliography.
    """

    @staticmethod
    def defaultConfig():
        config = command.CommandExtension.defaultConfig()
        config['duplicate_warning'] = (True, "Show a warning when duplicate entries detected.")
        config['duplicates'] = (list(), "A list of duplicates that are allowed.")
        return config

    def __init__(self, *args, **kwargs):
        command.CommandExtension.__init__(self, *args, **kwargs)

        self.__database = None
        self.__citations = list()

    def addCitations(self, *args):
        self.__citations.extend(args)

    def preExecute(self):

        duplicates = self.get('duplicates', list())
        self.__database = BibliographyData()

        bib_files = []
        for node in self.translator.getPages():
            if node.source.endswith('.bib'):
                bib_files.append(node.source)

        for bfile in bib_files:
            try:
                db = parse_file(bfile)
            except UndefinedMacro as e:
                msg = "The BibTeX file %s has an undefined macro:\n%s"
                LOG.warning(msg, bfile, e)

            #TODO: https://bitbucket.org/pybtex-devs/pybtex/issues/93/
            #      databaseadd_entries-method-not-considering
            warn = self.get('duplicate_warning')
            for key in db.entries:
                duplicate_key = key in self.__database.entries
                duplicate_key_allowed = key in duplicates
                if duplicate_key and (not duplicate_key_allowed):
                    if warn:
                        msg = "The BibTeX entry '%s' defined in %s already exists."
                        LOG.warning(msg, key, bfile)
                elif not duplicate_key:
                    self.__database.add_entry(key, db.entries[key])

    def preTokenize(self, page, ast):
        page['citations'] = list()

    def postTokenize(self, page, ast):
        if self.__citations:
            page['citations'].extend(self.__citations)
            self.__citations.clear()

            has_bib = False
            for node in moosetree.iterate(ast):
                if node.name == 'BibtexBibliography':
                    has_bib = True
                    break

            if not has_bib:
                BibtexBibliography(ast)

    @property
    def database(self):
        return self.__database

    def extend(self, reader, renderer):
        self.requires(core, command)

        self.addCommand(reader, BibtexCommand())
        self.addCommand(reader, BibtexReferenceComponent())

        renderer.add('BibtexCite', RenderBibtexCite())
        renderer.add('BibtexBibliography', RenderBibtexBibliography())

        if isinstance(renderer, LatexRenderer):
            renderer.addPackage('natbib', 'round')
예제 #22
0
    def filter_bibolamazifile(self, bibolamazifile):
        #
        # bibdata is a pybtex.database.BibliographyData object
        #
        bibdata = bibolamazifile.bibliographyData();

        arxivaccess = arxivutil.setup_and_get_arxiv_accessor(bibolamazifile)

        # first, find required fields and apply possible "filters"

        _rx_short_journal_known = re.compile(r'\b(?P<word>' + r'|'.join(KNOWN_ABBREV.keys()) + r')\b',
                                             re.IGNORECASE);
        def abbreviate(x):
            if x.lower() in NO_ABBREV:
                return x
            return x[0:3]+'.'

        def short_journal(x):
            if x.strip().lower() in KNOWN_JOURNALS:
                return KNOWN_JOURNALS[x.strip().lower()]
            x = _rx_short_journal_known.sub(lambda m: KNOWN_ABBREV[m.group('word').lower()], x);
            x = re.sub(r'\b(' + r'|'.join(BORING_WORDS) + r')\b(?!\s*($|[-:;\.]))', '', x, flags=re.IGNORECASE);
            x = re.sub(r'\b(?P<word>\w+)\b([^\.]|$)',
                       lambda m: abbreviate(m.group('word')), x);
            x = re.sub(r'[^\w.]+', '', x)
            if (len(x)>20):
                x = x[0:18]+'..'
            return x;

        def arxivInfo(entry, field):
            inf = arxivaccess.getArXivInfo(entry.key);
            if inf is None:
                return ''
            return inf[field]
        
        fld_fn = {
            'author': lambda entry: getlast(entry.persons['author'][0], lower=False)[0],
            'authors': lambda entry: "".join([getlast(a, lower=False)[0] for a in entry.persons['author']])[0:25],
            'year': lambda entry: entry.fields.get('year', ''),
            'year2': lambda entry: '%02d' % (int(entry.fields.get('year', '')) % 100),
            'journal_abb': lambda entry: fmtjournal(entry.fields.get('journal', '')),
            'journal': lambda entry: short_journal(normstr(delatex(entry.fields.get('journal', '')),lower=False)),
            'title_word': lambda entry: next(
                (word for word in re.sub(r'[^\w\s]', '', delatex(entry.fields.get('title', ''))).split()
                 if word.lower() not in BORING_TITLE_WORDS),
                ''
                 ),
            'doi': lambda entry: entry.fields.get('doi', ''),
            'arxivid': lambda entry: arxivInfo(entry, 'arxivid'),
            'primaryclass': lambda entry: arxivInfo(entry, 'primaryclass'),
            };
        # used fields
        fld = set([m.group('field') for m in re.finditer(r'(^|[^%])(%%)*%\((?P<field>\w+)\)', self.fmt)])
        # check all valid fields
        for f in fld:
            if f not in fld_fn:
                raise BibFilterError('citekey', "Invalid field `%s\' for citekey filter")

        logger.debug('Used fields are %r', fld)

        newbibdata = BibliographyData()
        
        class Jump: pass
        
        for (key, entry) in bibdata.entries.iteritems():

            keyorig = key
            
            try:
                ainfo = arxivaccess.getArXivInfo(key);
                if (self.if_published is not None):
                    if (not self.if_published and (ainfo is None or ainfo['published'])):
                        logger.longdebug('Skipping published entry %s (filter: unpublished)', key)
                        raise Jump
                    if (self.if_published and (ainfo is not None and not ainfo['published'])):
                        logger.longdebug('Skipping unpublished entry %s (filter: published)', key)
                        raise Jump
                if self.if_type is not None:
                    if entry.type not in self.if_type:
                        logger.longdebug('Skipping entry %s of different type %s (filter: %r)',
                                         key, entry.type, self.if_type)
                        raise Jump

                repldic = dict(zip(fld, [fld_fn[f](entry) for f in fld]));

                try:
                    key =  self.fmt % repldic;
                except ValueError as e:
                    raise BibFilterError('citekey', "Error replacing fields: %s" % (e))
                
            except Jump:
                pass
            finally:
                # avoid duplicate keys
                newkey = key
                count = 0
                while newkey in newbibdata.entries:
                    count += 1;
                    newkey = key + '.%d'%(count)
                if count:
                    logger.warning("`%s': Citation key `%s' already used: using `%s' instead.",
                                   keyorig, key, newkey)
                # add the entry
                newbibdata.add_entry(newkey, entry);

        bibolamazifile.setBibliographyData(newbibdata);

        return