def setUp(self): self.db = database.Database(adapter='sqlite', db_name='sqlite:///ace_test_database.tmp') self.manager = sources.SourceManager(self.db)
# In this example we create a new DB file and process a bunch of # articles. Note that due to copyright restrictions, articles can't # be included in this package, so you'll need to replace PATH_TO_FILES # with something that works. import ace from ace import database # Uncomment the next line to seem more information ace.set_logging_level('info') # Change this to a valid path to a set of html files. PATH_TO_FILES = "/Users/tal/tmp/html/*.html" db = database.Database('example_db.sqlite') db.add_articles(PATH_TO_FILES) db.print_stats()
] ids_designation = 'fixed_articles' scraper.retrieve_journal_articles_by_id(ids, mode='direct', ids_designation=ids_designation, delay=1) # Uncomment the next line to seem more information ace.set_logging_level('debug') # Change this to a valid path to a set of html files. PATH_TO_FILES = output_dir + "/html/" + ids_designation + "/*.html" meta_dir = './tmp/meta' if not os.path.exists(meta_dir): os.makedirs(meta_dir) table_dir = './tmp/table' if not os.path.exists(table_dir): os.makedirs(table_dir) db = database.Database(adapter='sqlite', db_name='sqlite:///example_db_test.sqlite') db.add_articles(PATH_TO_FILES, pmid_filenames=True, metadata_dir=meta_dir, table_dir=table_dir) db.print_stats() db.save()
def setUp(self): self.db = database.Database('ace_test_database.tmp') self.manager = sources.SourceManager(self.db)