Example #1
0
    def index(self):
        """
                    By Default Flask will come into this when we run the file.

        * The can be accessed by 125.0.0.5000/api?doi=
        * User can either search for a particular doi or can get all the data available in the database

        """
        if 'doi' in request.args:
            global doi_s
            doi_s = str(request.args['doi'])
        else:
            return 'Error:'
        global out_db
        if doi_s == 'all':
            out_db = database.read_all()
            return redirect(url_for('ApiView:display_all'))
        try:
            doi.validate_doi(doi_s)
            domain = doi.get_real_url_from_doi(doi_s)
        except ValueError:
            return 'Invalid doi'
        doi_temp = database.check([doi_s])
        if doi_temp:
            scrap = scrape.Scrape()
            scrap.scrape([domain], current_app.config['DICT_OF_SPIDERS'])
        out_db = database.read([doi_s])
        return redirect(url_for('ApiView:display_all'))
Example #2
0
def main(f, failed_out, new_out, log_out):
    logging.basicConfig(filename=log_out, level=logging.INFO)
    logging.getLogger().addHandler(logging.StreamHandler())
    new_docs = []
    failed_docs = []
    try:
        with open(f) as fd:
            docs = list(yaml.load_all(fd))
            N = len(docs)
            for K, d in enumerate(docs):
                affs = has_affiliation(d)
                success_rate = 100.0 - float(len(failed_docs)) / N * 100
                if affs:
                    new_docs.append(d)
                    continue
                try:
                    url = doi.get_real_url_from_doi(d['doi'])
                    logging.info(
                        "{c.Fore.CYAN}[{s:.1f}%]: {K}/{N}. {t}>>"
                        "{c.Fore.YELLOW}Trying {0}{c.Style.RESET_ALL}"
                            .format(url, s=success_rate,
                                K=K, N=N, t=time.ctime(), c=colorama))
                    downs = papis.downloaders.get_matching_downloaders(url)
                    downs[0].fetch_data()
                    ctx = downs[0].ctx

                    affs = has_affiliation(ctx.data)
                    if affs:
                        d['author_list'] = ctx.data['author_list']
                    else:
                        raise Exception('No affiliations')
                except Exception as e:
                    d['_error_msg'] = str(e)
                    logging.info("{c.Fore.RED}\tFailed ({e}){c.Style.RESET_ALL}"
                          .format(e=e, c=colorama))
                    failed_docs.append(d)
                else:
                    logging.info("{c.Fore.GREEN}\tsuccess "
                          "{affs!s:.100}{c.Style.RESET_ALL}"
                          .format(url, affs=affs, c=colorama))
                    new_docs.append(d)
    except Exception as e:
        logging.error(e)
    finally:
        with open(failed_out, 'w+') as fd:
            logging.info('writing ' + failed_out)
            yaml.dump_all(
                list(failed_docs),
                fd,
                allow_unicode=True,
                default_flow_style=False)

        with open(new_out, 'w+') as fd:
            logging.info('writing '+ new_out)
            yaml.dump_all(
                list(new_docs),
                fd,
                allow_unicode=True,
                default_flow_style=False)
Example #3
0
def test_get_real_url_from_doi() -> None:
    data = [
        ('10.1016/S0009-2614(97)04014-1',
         'https://www.sciencedirect.com/science/'
         'article/abs/pii/S0009261497040141'),
    ]
    for doi, url in data:
        assert url == get_real_url_from_doi(doi)
Example #4
0
    def search_doi(self):
        """
            Get Input from the user , validate and return the bibliographical details

        After clicking the submit button or search button , flask comes here.
        The values of DOI 's are obtained from the user either as string separated by comma or as a json or csv file.
        Uploaded files are saved in the Upload folder.
        The DOI 's are parsed and saved as a list , removed the duplicate ones.
        Validated the DOI 's by checking the correct format of DOI provided by DOI.org .
        The url link is obtained from doi.get_url_from_doi(doi).
        Check the database for the details for each doi.
        If DOI 's are not present in the database, the domains are saved as a list and Scrape object is called.
        The data corresponds to the DOI 's are obtained.

        :return: html page containing the bibliographical data

        """
        from project_doi import database

        global out_db, doi_s
        list_doi = []
        if request.method == 'POST':
            if 'doi' in request.form:
                list_doi = request.form['doi'].split(',')
            if 'file' in request.files:
                file = request.files['file']
                if file and self.allowed_file(file.filename):
                    filename = secure_filename(file.filename)
                    extension = file.filename.rsplit('.', 1)[1].lower()
                    path = os.path.join(current_app.config['UPLOAD_FOLDER'],
                                        filename)
                    file.save(path)
                    list_doi = self.upload_contents(extension, path)
                else:
                    flash('Please upload only csv and json formats')
            list_doi = list(dict.fromkeys(list_doi))
            doi_s = list_doi.copy()
            domain = {}
            for i in list_doi:
                try:
                    doi.validate_doi(i)
                    domain[i] = doi.get_real_url_from_doi(i)
                except ValueError:
                    flash(f'{i} : is not valid , please try again')
                    doi_s.remove(i)
            if doi_s is None:
                return redirect(url_for('DOIView:index'))
            doi_temp = database.check(doi_s)
            if doi_temp:
                doi_ = doi_temp
                domains = [domain[i] for i in doi_ if i in domain]
                doi_temp.clear()
                scrap = scrape.Scrape()
                scrap.scrape(domains, app.config['DICT_OF_SPIDERS'])
            out_db = database.read(doi_s)
        return render_template("search/search_doi.html", context=out_db)
Example #5
0
 def index(self):
     if 'doi' in request.args:
         global doi_s
         doi_s = str(request.args['doi'])
     else:
         return 'Error:'
     global out_db
     if doi_s == 'all':
         out_db = database.read_all()
         return redirect(url_for('ApiView:display_all'))
     try:
         doi.validate_doi(doi_s)
         domain = doi.get_real_url_from_doi(doi_s)
     except ValueError:
         return 'Invalid doi'
     doi_temp = database.check([doi_s])
     if doi_temp:
         scrap = scrape.Scrape()
         scrap.scrape([domain], current_app.config['DICT_OF_SPIDERS'])
     out_db = database.read([doi_s])
     return redirect(url_for('ApiView:display_all'))
Example #6
0
 def search_doi(self):
     global out_db, doi_s
     list_doi = []
     if request.method == 'POST':
         if 'doi' in request.form:
             list_doi = request.form['doi'].split(',')
         if 'file' in request.files:
             file = request.files['file']
             if file and self.allowed_file(file.filename):
                 filename = secure_filename(file.filename)
                 extension = file.filename.rsplit('.', 1)[1].lower()
                 path = os.path.join(current_app.config['UPLOAD_FOLDER'], filename)
                 file.save(path)
                 list_doi = self.upload_contents(extension, path)
             else:
                 flash('Please upload only csv and json formats')
         list_doi = list(dict.fromkeys(list_doi))
         doi_s = list_doi
         domain = {}
         for i in list_doi:
             try:
                 doi.validate_doi(i)
                 domain[i] = doi.get_real_url_from_doi(i)
             except ValueError:
                 flash(f'{i} : is not valid , please try again')
                 doi_s.remove(i)
         if doi_s is None:
             return redirect(url_for('DOIView:index'))
         doi_temp = database.check(doi_s)
         if doi_temp:
             doi_ = doi_temp
             domains = [domain[i] for i in doi_ if i in domain]
             doi_temp.clear()
             scrap = scrape.Scrape()
             success = scrap.scrape(domains, app.config['DICT_OF_SPIDERS'])
             if success:
                 for i in success:
                     print('i in succscc', i)
         out_db = database.read(doi_s)
     return render_template("search/search_doi.html", context=out_db)