from Models import WordGraph, Test # import pickle # import copy # import time # import os # import numpy as np # Main eightGradeExam = Test(start=0, end=2501, level='eightTrain', N=6) eightGradeExam.takeTest()
import pickle, os, time cache = '../Dropbox/ScienceQASharedCache/' # Get local copy of freebase if os.path.isfile(cache + 'FB_relations.p'): freebaseRelations = utils.loadData(cache + 'FB_relations.p') else: freebaseRelations = {} # Setup for worker pool poolWorkerNum = 200 poolIterations = 2 poolRedundancies = False # Get all keywords eightGradeExam = Test(start=0, end=8132, dataType='val', N=6) keywords = eightGradeExam.getSecondOrderKeywords() # save second order keywords utils.saveData(keywords, cache + 'SecondOrderKeywords.p') print('Keywords saved.') # Filter keywords already in local freebaseRelations keywords = [kw for kw in keywords if kw not in freebaseRelations] print('Number of first order keywords left: {}'.format(len(keywords))) start_download = time.time() # Get keywords from Freebase freebaseChunk2Mids, freebaseMid2Triples = scraper.getFreebaseCompendium(keywords, workerNum = poolWorkerNum, iterations=poolIterations, redundancies=poolRedundancies)