# 'delay': 10, # 'search': 'fmri', # 'mode': 'direct', # PLoS sends nice usable XML directly # 'min_pmid': None # }, # 'Journal of Neuroscience': { # 'delay': 20, # 'mode': 'browser', # 'search': 'fmri', # 'min_pmid': None, # 'limit': 100 # We can limit to only N new articles # } # } # Verbose output ace.set_logging_level('debug') # Create temporary output dir output_dir = './tmp/articles' if not os.path.exists(output_dir): os.makedirs(output_dir) # Initialize Scraper scraper = Scraper(output_dir) ids = [ "17427209", "21966352", "17475792", "21300162", "20188841", "21382560", "14960288", "21232548", "20809855", "19651151", "21686071", "19875675", "20816974", "21782354", "20933020", "18586110", "21499511", "22137505", "19766936", "19580877" ]
'delay': 10, 'search': 'fmri', 'mode': 'direct', # PLoS sends nice usable XML directly 'min_pmid': None }, 'Journal of Neuroscience': { 'delay': 20, 'mode': 'browser', 'search': 'fmri', 'min_pmid': None, 'limit': 100 # We can limit to only N new articles } } # Verbose output ace.set_logging_level('debug') # Create temporary output dir output_dir = '/tmp/articles' if not os.path.exists(output_dir): os.makedirs(output_dir) # Initialize Scraper scraper = Scraper('/tmp/articles') # Loop through journals and for j, settings in journals.items(): scraper.retrieve_journal_articles(j, **settings)
# In this example we create a new DB file and process a bunch of # articles. Note that due to copyright restrictions, articles can't # be included in this package, so you'll need to replace PATH_TO_FILES # with something that works. import ace from ace import database # Uncomment the next line to seem more information ace.set_logging_level("info") # Change this to a valid path to a set of html files. PATH_TO_FILES = "/Users/tal/tmp/html/*.html" db = database.Database("example_db.sqlite") db.add_articles(PATH_TO_FILES) db.print_stats()
# In this example we create a new DB file and process a bunch of # articles. Note that due to copyright restrictions, articles can't # be included in this package, so you'll need to replace PATH_TO_FILES # with something that works. import ace from ace import database # Uncomment the next line to seem more information ace.set_logging_level('info') # Change this to a valid path to a set of html files. PATH_TO_FILES = "/Users/tal/tmp/html/*.html" db = database.Database('example_db.sqlite') db.add_articles(PATH_TO_FILES) db.print_stats()
# In this example we create a new DB file and process a bunch of # articles. Note that due to copyright restrictions, articles can't # be included in this package, so you'll need to replace PATH_TO_FILES # with something that works. import ace from ace import database from ace import export from ace.database import Database, Article, Table, Activation # Uncomment the next line to seem more information ace.set_logging_level('info') # Change this to a valid path to a set of html files. cd /home/jflournoy/Documents/NeuroDebian/code/metanal/ACE PATH_TO_FILES = "./tmp/articles/html/ids/*.html" db = database.Database(adapter='sqlite',db_name='sqlite:///example_db.sqlite') #db.add_articles(PATH_TO_FILES) db.print_stats() export.export_database(db,'exported_data_2.csv',groups=True,size=True,statistic=True) for article in db.session.query(Article).filter(Article.tables.any()).all(): for t in article.tables: for p in t.activations: if isinstance(p.size, basestring): p.size_tmp = [p.size] elif p.size is None: p.size_tmp = []