def run_test_data(data_csv): out_dir=os.path.join(os.path.dirname(__file__),"../../../outputdata/test_{}".format(time.strftime('%Y%m%d_%H%M%S'))) os.makedirs(out_dir) logger = setup_log(out_dir) aristo_data = AristoData(data_csv) aristo_data.print_summary() pipeline = SolrWikipediaAllAnswerThenQuestionPipeline(data=aristo_data, logger = logger) pipeline.run_pipeline() pipeline.write_to_disk((out_dir)) print(pipeline.score())
def __init__(self,userid, password, out_dir, logger=None): self.userid=userid self.password = password self.logger =logger if logger is None: log_dir = os.path.join(os.path.dirname(__file__),"../../../outputdata/Ck12CorpusCreater_{}".format(time.strftime('%Y%m%d_%H%M%S'))) Utilities.create_dir(log_dir) self.logger = setup_log(log_dir, "Ck12CorpusCreater") Utilities.create_dir(out_dir) self.profile = webdriver.FirefoxProfile() self.profile.set_preference('browser.download.folderList',2) #custom location self.profile.set_preference('browser.download.dir',os.path.abspath( out_dir)) self.profile.set_preference('browser.download.manager.showWhenStarting', False) self.profile.set_preference("pdfjs.disabled", True) self.profile.set_preference('browser.helperApps.neverAsk.saveToDisk',"application/pdf") self.profile.set_preference('plugin.scan.plid.all',False) self.profile.set_preference("plugin.scan.Acrobat","99.0") self.driver = webdriver.Firefox(self.profile)
def download(url, credentials, logger ): (userid, password) = credentials folder_name = urlparse(url).path.replace("/","").replace("%","") with Ck12CorpusCreater(userid, password, os.path.join(os.path.dirname(__file__), "../../../corpus/{}".format(folder_name)), logger=logger) as ck12CorpusCreater : ck12CorpusCreater.download_book(url) log_dir = os.path.join(os.path.dirname(__file__),"../../../outputdata/Ck12CorpusCreater_{}".format(time.strftime('%Y%m%d_%H%M%S'))) Utilities.create_dir(log_dir) log = setup_log(log_dir, "Ck12CorpusCreater") try: credentialsCk12 = Utilities.get_credentials("https://www.ck12.org/") # # download("https://www.ck12.org/book/CK-12-Life-Science-Concepts-For-Middle-School",credentialsCk12, log) # download("https://www.ck12.org/book/CK-12-Earth-Science-Concepts-For-High-School", credentialsCk12,log) # download("https://www.ck12.org/book/CK-12-Earth-Science-Concepts-For-Middle-School", credentialsCk12,log) # download("https://www.ck12.org/book/CK-12-Physical-Science-Concepts-For-Middle-School",credentialsCk12,log) # download("https://www.ck12.org/book/CK-12-Biology-Concepts",credentialsCk12,log) # download("https://www.ck12.org/book/CK-12-Chemistry-Basic",credentialsCk12,log) # download("https://www.ck12.org/book/CK-12-Chemistry-Concepts-Intermediate",credentialsCk12,log) # download("https://www.ck12.org/book/CK-12-Physics-Concepts---Intermediate",credentialsCk12,log) ####download("https://www.ck12.org/book/CK-12-Understanding-Biodiversity",credentialsCk12,log) #download("https://www.ck12.org/book/CK-12-Biology-Advanced-Concepts/",credentialsCk12,log)