def match(cls, uri: str) -> Optional[papis.importer.Importer]: try: doi.validate_doi(uri) except ValueError: return None else: return Importer(uri=uri)
def match(cls, uri): try: doi.validate_doi(uri) except ValueError: return None else: return Importer(uri=uri)
def index(self): """ By Default Flask will come into this when we run the file. * The can be accessed by 125.0.0.5000/api?doi= * User can either search for a particular doi or can get all the data available in the database """ if 'doi' in request.args: global doi_s doi_s = str(request.args['doi']) else: return 'Error:' global out_db if doi_s == 'all': out_db = database.read_all() return redirect(url_for('ApiView:display_all')) try: doi.validate_doi(doi_s) domain = doi.get_real_url_from_doi(doi_s) except ValueError: return 'Invalid doi' doi_temp = database.check([doi_s]) if doi_temp: scrap = scrape.Scrape() scrap.scrape([domain], current_app.config['DICT_OF_SPIDERS']) out_db = database.read([doi_s]) return redirect(url_for('ApiView:display_all'))
def search_doi(self): """ Get Input from the user , validate and return the bibliographical details After clicking the submit button or search button , flask comes here. The values of DOI 's are obtained from the user either as string separated by comma or as a json or csv file. Uploaded files are saved in the Upload folder. The DOI 's are parsed and saved as a list , removed the duplicate ones. Validated the DOI 's by checking the correct format of DOI provided by DOI.org . The url link is obtained from doi.get_url_from_doi(doi). Check the database for the details for each doi. If DOI 's are not present in the database, the domains are saved as a list and Scrape object is called. The data corresponds to the DOI 's are obtained. :return: html page containing the bibliographical data """ from project_doi import database global out_db, doi_s list_doi = [] if request.method == 'POST': if 'doi' in request.form: list_doi = request.form['doi'].split(',') if 'file' in request.files: file = request.files['file'] if file and self.allowed_file(file.filename): filename = secure_filename(file.filename) extension = file.filename.rsplit('.', 1)[1].lower() path = os.path.join(current_app.config['UPLOAD_FOLDER'], filename) file.save(path) list_doi = self.upload_contents(extension, path) else: flash('Please upload only csv and json formats') list_doi = list(dict.fromkeys(list_doi)) doi_s = list_doi.copy() domain = {} for i in list_doi: try: doi.validate_doi(i) domain[i] = doi.get_real_url_from_doi(i) except ValueError: flash(f'{i} : is not valid , please try again') doi_s.remove(i) if doi_s is None: return redirect(url_for('DOIView:index')) doi_temp = database.check(doi_s) if doi_temp: doi_ = doi_temp domains = [domain[i] for i in doi_ if i in domain] doi_temp.clear() scrap = scrape.Scrape() scrap.scrape(domains, app.config['DICT_OF_SPIDERS']) out_db = database.read(doi_s) return render_template("search/search_doi.html", context=out_db)
def index(self): if 'doi' in request.args: global doi_s doi_s = str(request.args['doi']) else: return 'Error:' global out_db if doi_s == 'all': out_db = database.read_all() return redirect(url_for('ApiView:display_all')) try: doi.validate_doi(doi_s) domain = doi.get_real_url_from_doi(doi_s) except ValueError: return 'Invalid doi' doi_temp = database.check([doi_s]) if doi_temp: scrap = scrape.Scrape() scrap.scrape([domain], current_app.config['DICT_OF_SPIDERS']) out_db = database.read([doi_s]) return redirect(url_for('ApiView:display_all'))
def search_doi(self): global out_db, doi_s list_doi = [] if request.method == 'POST': if 'doi' in request.form: list_doi = request.form['doi'].split(',') if 'file' in request.files: file = request.files['file'] if file and self.allowed_file(file.filename): filename = secure_filename(file.filename) extension = file.filename.rsplit('.', 1)[1].lower() path = os.path.join(current_app.config['UPLOAD_FOLDER'], filename) file.save(path) list_doi = self.upload_contents(extension, path) else: flash('Please upload only csv and json formats') list_doi = list(dict.fromkeys(list_doi)) doi_s = list_doi domain = {} for i in list_doi: try: doi.validate_doi(i) domain[i] = doi.get_real_url_from_doi(i) except ValueError: flash(f'{i} : is not valid , please try again') doi_s.remove(i) if doi_s is None: return redirect(url_for('DOIView:index')) doi_temp = database.check(doi_s) if doi_temp: doi_ = doi_temp domains = [domain[i] for i in doi_ if i in domain] doi_temp.clear() scrap = scrape.Scrape() success = scrap.scrape(domains, app.config['DICT_OF_SPIDERS']) if success: for i in success: print('i in succscc', i) out_db = database.read(doi_s) return render_template("search/search_doi.html", context=out_db)
def test_validate_doi() -> None: data = [ ('10.1063/1.5081715', 'http://aip.scitation.org/doi/10.1063/1.5081715'), ('10.1007%2FBF01451751', 'http://link.springer.com/10.1007/BF01451751'), ('10.1103/PhysRevLett.49.57', 'https://link.aps.org/doi/10.1103/PhysRevLett.49.57'), ('10.1080/14786442408634457', 'https://www.tandfonline.com/doi/full/10.1080/14786442408634457'), ('10.1021/jp003647e', 'https://pubs.acs.org/doi/10.1021/jp003647e'), ('10.1016/S0009-2614(97)04014-1', 'https://linkinghub.elsevier.com/retrieve/pii/S0009261497040141'), ] for doi, url in data: assert url == validate_doi(doi) for doi in ['', 'asdf']: try: validate_doi(doi) except ValueError as e: assert str(e) == 'HTTP 404: DOI not found'