Exemple #1
0
    def post(self, request, *args, **kwargs):
        unique_id = request.POST.get('unique_id')
        project_pk = self.kwargs.get('project_pk')

        project_obj = get_object_or_404(Project, pk=project_pk)
        matching_source_obj = None
        for source in PublicationSource.objects.all():
            if source.name == 'doi':
                try:
                    status, bib_str = crossref.get_bib(unique_id)
                    bp = BibTexParser(interpolate_strings=False)
                    bib_database = bp.parse(bib_str)
                    bib_json = bib_database.entries[0]
                    matching_source_obj = source
                    break
                except:
                    continue

            elif source.name == 'adsabs':
                try:
                    url = 'http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode={}&data_type=BIBTEX'.format(
                        unique_id)
                    r = requests.get(url, timeout=5)
                    bp = BibTexParser(interpolate_strings=False)
                    bib_database = bp.parse(r.text)
                    bib_json = bib_database.entries[0]
                    matching_source_obj = source
                    break
                except:
                    continue

        if not matching_source_obj:
            return render(request, self.template_name, {})

        year = as_text(bib_json['year'])
        author = as_text(bib_json['author']).replace(
            '{\\textquotesingle}', "'").replace('{\\textendash}', '-').replace(
                '{\\textemdash}',
                '-').replace('{\\textasciigrave}',
                             ' ').replace('{\\textdaggerdbl}',
                                          ' ').replace('{\\textdagger}', ' ')
        title = as_text(bib_json['title']).replace(
            '{\\textquotesingle}', "'").replace('{\\textendash}', '-').replace(
                '{\\textemdash}',
                '-').replace('{\\textasciigrave}',
                             ' ').replace('{\\textdaggerdbl}',
                                          ' ').replace('{\\textdagger}', ' ')

        author = re.sub("{|}", "", author)
        title = re.sub("{|}", "", title)
        context = {}
        context['author'] = author
        context['year'] = year
        context['title'] = title
        context['unique_id'] = unique_id
        context['source'] = matching_source_obj
        context['project_pk'] = project_obj.pk

        return render(request, self.template_name, context)
Exemple #2
0
    def _search_id(self, unique_id):
        matching_source_obj = None
        for source in PublicationSource.objects.all():
            if source.name == 'doi':
                try:
                    status, bib_str = crossref.get_bib(unique_id)
                    bp = BibTexParser(interpolate_strings=False)
                    bib_database = bp.parse(bib_str)
                    bib_json = bib_database.entries[0]
                    matching_source_obj = source
                    break
                except:
                    continue

            elif source.name == 'adsabs':
                try:
                    url = 'http://adsabs.harvard.edu/cgi-bin/nph-bib_query?bibcode={}&data_type=BIBTEX'.format(
                        unique_id)
                    r = requests.get(url, timeout=5)
                    bp = BibTexParser(interpolate_strings=False)
                    bib_database = bp.parse(r.text)
                    bib_json = bib_database.entries[0]
                    matching_source_obj = source
                    break
                except:
                    continue

        if not matching_source_obj:
            return False

        year = as_text(bib_json['year'])
        author = as_text(bib_json['author']).replace('{\\textquotesingle}', "'").replace('{\\textendash}', '-').replace(
            '{\\textemdash}', '-').replace('{\\textasciigrave}', ' ').replace('{\\textdaggerdbl}', ' ').replace('{\\textdagger}', ' ')
        title = as_text(bib_json['title']).replace('{\\textquotesingle}', "'").replace('{\\textendash}', '-').replace(
            '{\\textemdash}', '-').replace('{\\textasciigrave}', ' ').replace('{\\textdaggerdbl}', ' ').replace('{\\textdagger}', ' ')

        author = re.sub("{|}", "", author)
        title = re.sub("{|}", "", title)

        # not all bibtex entries will have a journal field
        if 'journal' in bib_json:
            journal = as_text(bib_json['journal']).replace('{\\textquotesingle}', "'").replace('{\\textendash}', '-').replace(
                '{\\textemdash}', '-').replace('{\\textasciigrave}', ' ').replace('{\\textdaggerdbl}', ' ').replace('{\\textdagger}', ' ')
            journal = re.sub("{|}", "", journal)
        else:
            # fallback: clearly indicate that data was absent
            source_name = matching_source_obj.name
            journal = '[no journal info from {}]'.format(source_name.upper())

        pub_dict = {}
        pub_dict['author'] = author
        pub_dict['year'] = year
        pub_dict['title'] = title
        pub_dict['journal'] = journal
        pub_dict['unique_id'] = unique_id
        pub_dict['source_pk'] = matching_source_obj.pk

        return pub_dict
Exemple #3
0
    def pull(self):
        user_agents = [
            'Mozilla/5.0 (Windows NT 6.1; WOW64)',
            'AppleWebKit/537.36 (KHTML, like Gecko)',
            'Chrome/35.0.1916.114 Safari/537.36'
        ]

        headers = {'User-Agent': " ".join(user_agents)}
        within = 'owners%%2Eowner%%3DHOSTED'
        sort = '%%5Fscore'
        export_format = 'bibtex'
        url_template = 'https://dl.acm.org/exportformats_search.cfm?query=%s&within=%s&srt=%s&expformat=%s'

        result = BibDatabase()

        for query in self.queries:
            url = url_template % (query, within, sort, export_format)
            response = requests.get(url, cookies=self.cookies, headers=headers)
            self.cookies.update(response.cookies)
            bibtex_parser = BibTexParser(customization=convert_to_unicode)

            result.get_entry_list().append(
                bibtex_parser.parse(response.text).get_entry_list())

        return result
Exemple #4
0
def read_bibtex(bibtex_str):
    parser = BibTexParser(common_strings=True)
    parser.ignore_nonstandard_types = False
    parser.homogenize_fields = True
    bib_database = parser.parse(bibtex_str)
    keyworded = map(bibtexparser.customization.keyword, bib_database.entries)
    converted = list(map(bibtexparser.customization.convert_to_unicode, keyworded))
    authored = map(bibtexparser.customization.author, converted)
    return list(authored)
Exemple #5
0
def parse_bibtex(reference, bibtex_parser=None):
    if bibtex_parser is None:
        bibtex_parser = BibTexParser()
    try:
        result = bibtex_parser.parse(reference).get_entry_list()[-1]
    except IndexError:
        #unable to parse
        result = None

    return result
Exemple #6
0
    def add_entry_by_string(self,
                            bib_string,
                            file_name=None,
                            skip_if_file_exists=True,
                            skip_if_doi_exists=False,
                            parser=None):
        """
        Add a new entry corresponding to a BibTex string.
        :param bib_string: a string giving the section in a BibTex file that would represent this reference.
        :param file_name: the name of a local file to include in the reference section. Optional.
        :param skip_if_file_exists: boolean, default is True, meaning that if a reference pointing to the same local
        file already exists in the database, this reference will not be added. Intended to make it easy to update a
        database without worrying about overwriting existing files.
        :param skip_if_doi_exists: boolean, default is False, but if True, do not add this reference if another
        reference with the same DOI already exists. Intended to avoid adding duplicate files.
        :param parser: An instance of bibtexparser.bparser.BibTextParser customized to parse the new string. The default
        parser is set with:
            * ignore_nonstandard_types = False
            * parser.homogenise_fields = True
            * parser.customization = lambda entry: self.format_entry(entry)
        thus, the custom parsing uses the format_entry method of this class with the instance of the class at the time
        this method was called.
        :return: none, adds entry in place.
        """
        if skip_if_file_exists and file_name is not None:
            if file_name in self.files:
                root_logger.info(
                    'Not adding {}, entry for that file already in .bib file'.
                    format(file_name))
                return

        # To ensure we get a properly formatted string, we'll parse it into a standard BibDatabase then steal
        # the entry from it
        if parser is None:
            parser = BibTexParser()
            parser.ignore_nonstandard_types = False
            parser.homogenise_fields = True
            # Create a lambda function that knows about the current state of the database
            parser.customization = lambda entry: self.format_entry(entry)

        tmpdat = parser.parse(bib_string)

        if skip_if_doi_exists and 'doi' in tmpdat.entries[
                0] and tmpdat.entries[0]['doi'] in self.dois:
            root_logger.info(
                'Not adding {}, entry with DOI "{}" already in bib file'.
                format(file_name, tmpdat.entries[0]['doi']))
            return

        if file_name is not None:
            tmpdat.entries[0]['file'] = file_name

        # We shouldn't need to do anything else. The other means of access entries (e.g. the dict) seem to be properties
        # created on the fly from the entries list
        self.entries.append(tmpdat.entries[0])
Exemple #7
0
 def test_does_not_fail_on_non_bibtex_with_partial(self):
     bibraw = '''@misc{this looks,
       like = a = bibtex file but
           , is not a real one!
     '''
     parser = BibTexParser()
     bib = parser.parse(bibraw, partial=False)
     self.assertEqual(bib.entries, [])
     self.assertEqual(bib.preambles, [])
     self.assertEqual(bib.strings, {})
     self.assertEqual(bib.comments, [
         '@misc{this looks,\n'
         '          like = a = bibtex file but\n'
         '              , is not a real one!'
     ])
Exemple #8
0
    def post(self, request, *args, **kwargs):
        project_obj = get_object_or_404(Project,
                                        pk=self.kwargs.get('project_pk'))

        publications_do_export = self.get_publications_to_export(project_obj)
        context = {}

        formset = formset_factory(PublicationExportForm,
                                  max_num=len(publications_do_export))
        formset = formset(request.POST,
                          initial=publications_do_export,
                          prefix='publicationform')

        publications_deleted_count = 0
        bib_text = ''
        if formset.is_valid():
            for form in formset:
                publication_form_data = form.cleaned_data
                if publication_form_data['selected']:

                    publication_obj = Publication.objects.get(
                        project=project_obj,
                        title=publication_form_data.get('title'),
                        year=publication_form_data.get('year'),
                        unique_id=publication_form_data.get('unique_id'),
                    )
                    print("id is" + publication_obj.display_uid())
                    temp_id = publication_obj.display_uid()
                    status, bib_str = crossref.get_bib(
                        publication_obj.display_uid())
                    bp = BibTexParser(interpolate_strings=False)
                    bib_database = bp.parse(bib_str)
                    bib_text += bib_str
            response = HttpResponse(content_type='text/plain')
            response['Content-Disposition'] = 'attachment; filename=refs.bib'
            buffer = io.StringIO()
            buffer.write(bib_text)
            output = buffer.getvalue()
            buffer.close()
            response.write(output)
            return response
        else:
            for error in formset.errors:
                messages.error(request, error)

        return HttpResponseRedirect(
            reverse('project-detail', kwargs={'pk': project_obj.pk}))
Exemple #9
0
def new_publication():
    form = PublicationForm()
    bibForm = BibtexPublicationForm()

    if form.validate_on_submit():
        publication = Publication(title=form.title.data,
                                  doi=form.doi.data,
                                  year=form.year.data,
                                  journal=form.journal.data,
                                  type=form.type.data,
                                  status=form.status.data,
                                  primary_user=current_user.id)
        db.session.add(publication)
        db.session.commit()
        return redirect(url_for("view_publication", id=publication.id))

    elif bibForm.validate_on_submit():

        bp = BibTexParser(interpolate_strings=False)
        bib_database = bp.parse(bibForm.parse.data)
        bib_database.entries[0]

        def value(key):
            return bib_database.entries[0][key]

        keys = ("author", "title", "doi", "year", "ID", "journal", "status")
        if set(keys) <= set(bib_database.entries[0]):
            publication = Publication(title=value("title"),
                                      doi=value("doi"),
                                      year=value("year"),
                                      journal=value("journal"),
                                      type=value("ENTRYTYPE"),
                                      status=value("status"),
                                      primary_user=current_user.id)
            db.session.add(publication)
            db.session.commit()
            return redirect(url_for("view_publication", id=publication.id))

    return render_template("publication.html", form=form, bibForm=bibForm)
Exemple #10
0
class LazyBibDatabase(BibDatabase):
    """Lazy-loading subclass of bibtexparser.bibdatabase.BibDatabase.

    To improve performance on large files, this class indexes (:func:`_index`)
    the start and end locations of each entry in the file, but does not read or
    parse them. When :func:`get_entry` is called, only the single entry is read
    and parsed.

    This functionality should be pushed upstream to bibtexparser.

    """

    entry_re = re.compile(rb"^\s*@([^{]*){([^,}]*)", re.MULTILINE)

    def __init__(self, path, config):
        super(LazyBibDatabase, self).__init__()

        # Database file
        self._file = open(path, "rb")

        # Keywords index
        self.keywords = set()

        # Index the database
        self._all_loaded = False
        self._index()

        # Set up a parser to be used by _read_entry
        self._parser = BibTexParser(
            homogenize_fields=False,
            ignore_nonstandard_types=False,
            customization=lambda r: BibItem(r, self.keywords.update, config),
        )

    def _index(self):
        """Index the database."""
        # Use a mmap to avoid loading the entire file into memory
        m = mmap.mmap(self._file.fileno(), 0, access=mmap.ACCESS_READ)

        # Iterate through matches of the regular expression for entries
        breaks = []
        for match in self.entry_re.finditer(m):
            # Store (start, entry type, entry ID)
            info = [match.start()] + list(map(bytes.decode, match.groups()))
            if info[2] == "":
                info[2] = "<entry without ID at {0}>".format(*info)
            breaks.append(tuple(info))

        del m

        # Convert the breaks to an index
        self._entries_index = {}
        for idx, (start, entrytype, id) in enumerate(breaks):
            if entrytype in ("comment"):
                # Don't index comments
                continue

            try:
                # Current entry extends to the start of the next
                self._entries_index[id] = (start, breaks[idx + 1][0] - start)
            except IndexError:
                # Last entry in file, length of -1 will make read() gobble
                self._entries_index[id] = (start, -1)

    def _read_entry(self, key):
        """Actually read and parse the entry with ID *key*."""
        # Locate the start of the entry
        start, length = self._entries_index[key]
        self._file.seek(start)

        # Parse the entry
        self._parser.parse(self._file.read(length))

        # bibtexparser.bparser.BibTexParser uses an internal BibDatabase that
        # is not emptied for successive calls to parse(). Empty it.
        entry = self._parser.bib_database.entries.pop()
        assert len(self._parser.bib_database.entries) == 0

        # Store for later access
        self._entries_dict[entry["ID"]] = entry

        return entry

    def iter_entries(self, progress=False):
        if progress:
            return tqdm(self._generate_entries(),
                        total=len(self._entries_index),
                        leave=False)
        else:
            return iter(self._generate_entries())

    def _generate_entries(self):
        if self._all_loaded:
            yield from self._entries_dict.values()
        else:
            for key in self.iter_entry_keys():
                yield self.get_entry(key)
            self._all_loaded = True

    def get_entry(self, key):
        """Retrieve the entry with ID *key*."""
        try:
            return self._entries_dict[key]
        except KeyError:
            return self._read_entry(key)

    def iter_entry_keys(self):
        """Return an iterator over entry IDs.

        This is much faster than in BibDatabase, because the entries are not
        fully parsed. Use :func:`get_entry` to retrieve the actual entry.
        """
        return self._entries_index.keys()
 def test_parse_bom_bytes(self):
     parser = BibTexParser()
     with open(self.input_bom_file_path, 'rb') as bibtex_file:
         bibtex_str = bibtex_file.read()
         bibtex_database = parser.parse(bibtex_str)
     self.assertEqual(bibtex_database.entries, self.entries_expected)
Exemple #12
0
 def pull(self):
     bibtex_parser = BibTexParser(common_strings=True,
                                  interpolate_strings=False)
     self.input_file.seek(0)
     return bibtex_parser.parse(self.input_file.read())
Exemple #13
0
def import_command(ctx, paths):
    """Read new entries into the database.

    PATHS may be zero or more .bib files or directories containing .bib files
    to import.

    Configuration file keys

    \b
    import:
      path: a default path to check for .bib files to import, if no PATHS are
            given.
    """
    # If no files
    if len(paths) == 0:
        # Directory from which to import entries
        paths = [ctx.cmd_config("import").get("path", ".")]

    paths = [
        os.path.join(p, "*.bib") if os.path.isdir(p) else p for p in paths
    ]

    # A parser for reading entries
    parser = BibTexParser()
    parser.homogenise_fields = False
    parser.customization = _add_clean

    # Iterate over files in the add_dir
    for fn in chain(*map(iglob, paths)):
        os.system("clear")
        print("Importing", fn, end=":\n\n")

        # Read and parse the file
        with open(fn, "r") as f:
            s = f.read()
            print(s, end="\n\n")

        try:
            e = parser.parse(clean_str(s)).entries[-1]
        except ParseException:
            print(clean_str(s))
            raise

        abstract = e.pop("abstract", None)

        print("Parsed entry:", to_string(e), sep="\n\n")

        if abstract is not None:
            print("Abstract:", abstract, sep="\n\n")

        # Ask user for a key
        while True:
            key = input_with_prefill(
                "\nEnter key for imported entry "
                "([] Skip, [D]elete without importing, [Q]uit): ",
                guess_key(e),
            )
            try:
                ctx.db.get_entry(key)
                print("Key already exists.")
            except KeyError:
                break

        if key == "":
            continue
        elif key.lower() == "d":
            os.remove(fn)
            continue
        elif key.lower() == "q":
            break
        else:
            # Change the entry key
            e["ID"] = key

        # Add a custom field with the current date
        e["entrydate"] = datetime.now().isoformat(timespec="minutes")

        # Select a full text file to go with the entry
        fn_local = _select_file(e["ID"],
                                ctx.cmd_config("import").get("path", "."))
        if fn_local:
            e["localfile"] = os.path.basename(fn_local)

        # Append the entry to the database
        with open(ctx.config["database"], "a") as f_db:
            f_db.write("\n")
            f_db.write(to_string(e))

        # Write the abstract
        if abstract:
            fn_abstract = ctx.config["path"] / "abstracts" / ("%s.tex" % key)
            with open(fn_abstract, "x") as f_abstract:
                f_abstract.write(abstract)

        # Move the full text file
        if fn_local:
            os.system('mv -n "{}" "{}"'.format(
                fn_local, ctx.config["path"] / e["localfile"]))

        # Remove the imported entry file
        remove = input("\nRemove imported file %s ([Y]es, [enter] to "
                       "keep)? " % fn)
        if remove.lower() == "y":
            os.remove(fn)