Example #1
0
    #78 is for elsevier
    records = downloader.get_dict_from_search(
        'ascorbic acid+extraction+fruit&filter=member:78', 3000)
    for i, record in enumerate(records):
        print(i)
        cur_title = re.sub('[\[\]\'\.\/]', '', str(record['title']))
        replaced_doi = re.sub('[\[\]\'\.\/()]', '', str(record['doi']))

        print(replaced_doi)
        cur_filename = './elsevier/' + replaced_doi + '.' + filetype
        try:
            my_file = open(cur_filename, 'wb')  # Need to use 'wb' on Windows
        except Exception as e:
            print(str(e))
        #my_html_file = open('./crossref/'+str(i)+'.html', 'wb')
        ret = downloader.get_xml_from_doi(record['doi'], my_file, 'elsevier')
        #downloader.get_html_from_doi(doi, my_html_file, 'elsevier')

        my_file.close()

        if ret != True:
            continue
        if filetype == 'xml':
            rawtext = findText(cur_filename)
            print(rawtext)
            if rawtext == '':
                rawtext = 'empty'
            body = {
                'doi': replaced_doi,
                'text': rawtext,
                'url': str(record['url']),