from pyliterature import Pyliterature


# read url from bib file
urls = []
file = open('example.bib')
lines = file.readlines()
for line in lines:
    if 'url' in line:
        url = line.split('=')[1].split('{')[1].split('}')[0]
        urls.append(url)

#----------------------------------------
# read old database file
keyword = 'catalytic'
liter = Pyliterature()
liter.read_database(keyword)
# find new url not in the database
urls_new = []
for url in urls:
    if url not in liter.url_list:
        urls_new.append(url)

#-----------------------------------
# load text from new url html
for url in urls_new:
    liter.url = url
    print(url + '\n\n')
    liter.parser()

# parse keysnets from text
from pyliterature import Pyliterature

urls = []
file = open('urls.dat')
lines = file.readlines()
for line in lines:
    urls.append(line)

keyword = 'DFT'
liter = Pyliterature()
for url in urls:
	liter.url = url
	print(url + '\n\n')
	liter.parser()
#
liter.url = None
liter.keyword = keyword
# print(liter.text)
liter.parser()
print('===================================================')
for keysent in liter.keysents:
    print(keysent)
    print('\n')
Exemple #3
0
from pyliterature import Pyliterature

url = 'http://www.nature.com/nature/journal/v541/n7635/full/nature20782.html'
keyword = 'DFT'


liter = Pyliterature(url, keyword)
# load text from url html and parse the sentences including keyword
liter.parser()

print('=================article text==================================')
print(liter.text)

print('=================key sentences==================================')
for keysent in liter.keysents:
    print(keysent)
    print('\n')
Exemple #4
0
from pyliterature import Pyliterature

urls = [
    'http://science.sciencemag.org/content/355/6320/49.full',
    'http://www.nature.com/nature/journal/v541/n7635/full/nature20782.html',
    'http://www.sciencedirect.com/science/article/pii/S1751616116301138',
    'http://pubs.acs.org/doi/full/10.1021/acscatal.6b02960',
]

keyword = 'DFT'
liter = Pyliterature()
for url in urls:
    print(url + '\n\n')
    liter.url = url
    liter.parser()
#
liter.url = None
liter.keyword = keyword
# print(liter.text)
liter.parser()
print('===================================================')
for keysent in liter.keysents:
    print(keysent)
    print('\n')
Exemple #5
0
from pyliterature import Pyliterature
"""
load html every time is very slow, it's better to save the text we have into a database.
we can read text from the database next time.

"""
#----------------------------------------
# read old database file
keyword = 'DFT'
liter = Pyliterature()
liter.read_database(keyword)
for url in liter.url_list:
    print(url)

urls = [
    'http://science.sciencemag.org/content/355/6320/49.full',
    'http://www.nature.com/nature/journal/v541/n7635/full/nature20782.html',
    'http://www.sciencedirect.com/science/article/pii/S1751616116301138',
    'http://pubs.acs.org/doi/full/10.1021/acscatal.6b02960',
]
# find new url not in the database
urls_new = []
for url in urls:
    if url not in liter.url_list:
        urls_new.append(url)

#-----------------------------------
# load text from new url html
for url in urls_new:
    liter.url = url
    print(url + '\n\n')
from pyliterature import Pyliterature
url = 'http://www.sciencedirect.com/science/article/pii/S1751616116301138'
keyword = 'CALPHAD'

liter = Pyliterature(url, keyword)
# load text from url html and parse the sentences including keyword
liter.parser()

print('=================article text==================================')
print(liter.text)

print('=================key sentences==================================')
for keysent in liter.keysents:
    print(keysent)
    print('\n')