def run_test_data(data_csv):
    out_dir=os.path.join(os.path.dirname(__file__),"../../../outputdata/test_{}".format(time.strftime('%Y%m%d_%H%M%S')))
    os.makedirs(out_dir)
    logger = setup_log(out_dir)
    aristo_data = AristoData(data_csv)

    aristo_data.print_summary()
    pipeline = SolrWikipediaAllAnswerThenQuestionPipeline(data=aristo_data, logger = logger)
    pipeline.run_pipeline()


    pipeline.write_to_disk((out_dir))
    print(pipeline.score())
Example #2
0
    def __init__(self,userid, password, out_dir, logger=None):
        self.userid=userid
        self.password = password
        self.logger =logger
        if logger is None:
            log_dir = os.path.join(os.path.dirname(__file__),"../../../outputdata/Ck12CorpusCreater_{}".format(time.strftime('%Y%m%d_%H%M%S')))
            Utilities.create_dir(log_dir)
            self.logger = setup_log(log_dir, "Ck12CorpusCreater")

        Utilities.create_dir(out_dir)

        self.profile = webdriver.FirefoxProfile()
        self.profile.set_preference('browser.download.folderList',2) #custom location
        self.profile.set_preference('browser.download.dir',os.path.abspath( out_dir))
        self.profile.set_preference('browser.download.manager.showWhenStarting', False)
        self.profile.set_preference("pdfjs.disabled", True)
        self.profile.set_preference('browser.helperApps.neverAsk.saveToDisk',"application/pdf")
        self.profile.set_preference('plugin.scan.plid.all',False)
        self.profile.set_preference("plugin.scan.Acrobat","99.0")
        self.driver = webdriver.Firefox(self.profile)
Example #3
0

def download(url, credentials, logger ):
    (userid, password) = credentials
    folder_name =  urlparse(url).path.replace("/","").replace("%","")
    with Ck12CorpusCreater(userid, password, os.path.join(os.path.dirname(__file__), "../../../corpus/{}".format(folder_name)),
                           logger=logger)   as  ck12CorpusCreater :
        ck12CorpusCreater.download_book(url)





log_dir = os.path.join(os.path.dirname(__file__),"../../../outputdata/Ck12CorpusCreater_{}".format(time.strftime('%Y%m%d_%H%M%S')))
Utilities.create_dir(log_dir)
log = setup_log(log_dir, "Ck12CorpusCreater")

try:

    credentialsCk12 = Utilities.get_credentials("https://www.ck12.org/")
    #
    # download("https://www.ck12.org/book/CK-12-Life-Science-Concepts-For-Middle-School",credentialsCk12, log)
    # download("https://www.ck12.org/book/CK-12-Earth-Science-Concepts-For-High-School", credentialsCk12,log)
    # download("https://www.ck12.org/book/CK-12-Earth-Science-Concepts-For-Middle-School", credentialsCk12,log)
    # download("https://www.ck12.org/book/CK-12-Physical-Science-Concepts-For-Middle-School",credentialsCk12,log)
    # download("https://www.ck12.org/book/CK-12-Biology-Concepts",credentialsCk12,log)
    # download("https://www.ck12.org/book/CK-12-Chemistry-Basic",credentialsCk12,log)
    # download("https://www.ck12.org/book/CK-12-Chemistry-Concepts-Intermediate",credentialsCk12,log)
    # download("https://www.ck12.org/book/CK-12-Physics-Concepts---Intermediate",credentialsCk12,log)
    ####download("https://www.ck12.org/book/CK-12-Understanding-Biodiversity",credentialsCk12,log)
    #download("https://www.ck12.org/book/CK-12-Biology-Advanced-Concepts/",credentialsCk12,log)