Exemple #1
0
 def match(cls, uri: str) -> Optional[papis.importer.Importer]:
     try:
         doi.validate_doi(uri)
     except ValueError:
         return None
     else:
         return Importer(uri=uri)
Exemple #2
0
 def match(cls, uri):
     try:
         doi.validate_doi(uri)
     except ValueError:
         return None
     else:
         return Importer(uri=uri)
Exemple #3
0
    def index(self):
        """
                    By Default Flask will come into this when we run the file.

        * The can be accessed by 125.0.0.5000/api?doi=
        * User can either search for a particular doi or can get all the data available in the database

        """
        if 'doi' in request.args:
            global doi_s
            doi_s = str(request.args['doi'])
        else:
            return 'Error:'
        global out_db
        if doi_s == 'all':
            out_db = database.read_all()
            return redirect(url_for('ApiView:display_all'))
        try:
            doi.validate_doi(doi_s)
            domain = doi.get_real_url_from_doi(doi_s)
        except ValueError:
            return 'Invalid doi'
        doi_temp = database.check([doi_s])
        if doi_temp:
            scrap = scrape.Scrape()
            scrap.scrape([domain], current_app.config['DICT_OF_SPIDERS'])
        out_db = database.read([doi_s])
        return redirect(url_for('ApiView:display_all'))
Exemple #4
0
    def search_doi(self):
        """
            Get Input from the user , validate and return the bibliographical details

        After clicking the submit button or search button , flask comes here.
        The values of DOI 's are obtained from the user either as string separated by comma or as a json or csv file.
        Uploaded files are saved in the Upload folder.
        The DOI 's are parsed and saved as a list , removed the duplicate ones.
        Validated the DOI 's by checking the correct format of DOI provided by DOI.org .
        The url link is obtained from doi.get_url_from_doi(doi).
        Check the database for the details for each doi.
        If DOI 's are not present in the database, the domains are saved as a list and Scrape object is called.
        The data corresponds to the DOI 's are obtained.

        :return: html page containing the bibliographical data

        """
        from project_doi import database

        global out_db, doi_s
        list_doi = []
        if request.method == 'POST':
            if 'doi' in request.form:
                list_doi = request.form['doi'].split(',')
            if 'file' in request.files:
                file = request.files['file']
                if file and self.allowed_file(file.filename):
                    filename = secure_filename(file.filename)
                    extension = file.filename.rsplit('.', 1)[1].lower()
                    path = os.path.join(current_app.config['UPLOAD_FOLDER'],
                                        filename)
                    file.save(path)
                    list_doi = self.upload_contents(extension, path)
                else:
                    flash('Please upload only csv and json formats')
            list_doi = list(dict.fromkeys(list_doi))
            doi_s = list_doi.copy()
            domain = {}
            for i in list_doi:
                try:
                    doi.validate_doi(i)
                    domain[i] = doi.get_real_url_from_doi(i)
                except ValueError:
                    flash(f'{i} : is not valid , please try again')
                    doi_s.remove(i)
            if doi_s is None:
                return redirect(url_for('DOIView:index'))
            doi_temp = database.check(doi_s)
            if doi_temp:
                doi_ = doi_temp
                domains = [domain[i] for i in doi_ if i in domain]
                doi_temp.clear()
                scrap = scrape.Scrape()
                scrap.scrape(domains, app.config['DICT_OF_SPIDERS'])
            out_db = database.read(doi_s)
        return render_template("search/search_doi.html", context=out_db)
Exemple #5
0
 def index(self):
     if 'doi' in request.args:
         global doi_s
         doi_s = str(request.args['doi'])
     else:
         return 'Error:'
     global out_db
     if doi_s == 'all':
         out_db = database.read_all()
         return redirect(url_for('ApiView:display_all'))
     try:
         doi.validate_doi(doi_s)
         domain = doi.get_real_url_from_doi(doi_s)
     except ValueError:
         return 'Invalid doi'
     doi_temp = database.check([doi_s])
     if doi_temp:
         scrap = scrape.Scrape()
         scrap.scrape([domain], current_app.config['DICT_OF_SPIDERS'])
     out_db = database.read([doi_s])
     return redirect(url_for('ApiView:display_all'))
Exemple #6
0
 def search_doi(self):
     global out_db, doi_s
     list_doi = []
     if request.method == 'POST':
         if 'doi' in request.form:
             list_doi = request.form['doi'].split(',')
         if 'file' in request.files:
             file = request.files['file']
             if file and self.allowed_file(file.filename):
                 filename = secure_filename(file.filename)
                 extension = file.filename.rsplit('.', 1)[1].lower()
                 path = os.path.join(current_app.config['UPLOAD_FOLDER'], filename)
                 file.save(path)
                 list_doi = self.upload_contents(extension, path)
             else:
                 flash('Please upload only csv and json formats')
         list_doi = list(dict.fromkeys(list_doi))
         doi_s = list_doi
         domain = {}
         for i in list_doi:
             try:
                 doi.validate_doi(i)
                 domain[i] = doi.get_real_url_from_doi(i)
             except ValueError:
                 flash(f'{i} : is not valid , please try again')
                 doi_s.remove(i)
         if doi_s is None:
             return redirect(url_for('DOIView:index'))
         doi_temp = database.check(doi_s)
         if doi_temp:
             doi_ = doi_temp
             domains = [domain[i] for i in doi_ if i in domain]
             doi_temp.clear()
             scrap = scrape.Scrape()
             success = scrap.scrape(domains, app.config['DICT_OF_SPIDERS'])
             if success:
                 for i in success:
                     print('i in succscc', i)
         out_db = database.read(doi_s)
     return render_template("search/search_doi.html", context=out_db)
Exemple #7
0
def test_validate_doi() -> None:
    data = [
        ('10.1063/1.5081715',
         'http://aip.scitation.org/doi/10.1063/1.5081715'),
        ('10.1007%2FBF01451751',
         'http://link.springer.com/10.1007/BF01451751'),
        ('10.1103/PhysRevLett.49.57',
         'https://link.aps.org/doi/10.1103/PhysRevLett.49.57'),
        ('10.1080/14786442408634457',
         'https://www.tandfonline.com/doi/full/10.1080/14786442408634457'),
        ('10.1021/jp003647e', 'https://pubs.acs.org/doi/10.1021/jp003647e'),
        ('10.1016/S0009-2614(97)04014-1',
         'https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141'),
    ]
    for doi, url in data:
        assert url == validate_doi(doi)

    for doi in ['', 'asdf']:
        try:
            validate_doi(doi)
        except ValueError as e:
            assert str(e) == 'HTTP 404: DOI not found'