rel += 1 ap += rel / idx return ap / len(truth) FLAGS_USE_TYPE = True cur_dir = os.path.dirname(os.getcwd()) dataset = "wiki2" print('dataset:%s' % dataset) folder = cur_dir + '/data/{}/intermediate/'.format(dataset) start = time.time() print('data folder: {}'.format(folder)) print('loading eid and name maps') eid2ename, ename2eid = util.loadEidToEntityMap(folder + 'entity2id.txt') print('loading eid and skipgram maps') eid2patterns, pattern2eids = util.loadFeaturesAndEidMap( folder + 'reduced_eidSkipgramCounts.txt') print('loading skipgram strength maps') eidAndPattern2strength = util.loadWeightByEidAndFeatureMap( folder + 'setexpan_eidSkipgram2TFIDFStrength.txt', idx=-1) print('loading eid and type maps') eid2types, type2eids = util.loadFeaturesAndEidMap(folder + 'eidTypeCounts.txt') print('loading type strength maps') eidAndType2strength = util.loadWeightByEidAndFeatureMap( folder + 'eidType2TFIDFStrength.txt', idx=-1) end = time.time() print("Finish loading all dataset, using %s seconds" % (end - start)) good_gold_set = {} for filename in os.listdir('../data/eval/cleaned_set/'): with open('../data/eval/cleaned_set/' + filename, 'r') as fin: setname = filename.split('.')[0]
import random curr_dir = os.path.dirname(os.path.realpath(__file__)) ## Setting global versions FLAGS_USE_TYPE=False ## Loading Corpus data = "ap89" print('dataset:%s' % data) folder = '/../../data/'+data+'/intermediate/' start = time.time() print('loading eid and name maps') eid2ename, ename2eid = util.loadEidToEntityMap(curr_dir + folder+'entity2id.txt') #entity2id.txt print('loading eid and skipgram maps') eid2patterns, pattern2eids = util.loadFeaturesAndEidMap(curr_dir + folder+'eidSkipgramCounts.txt') #eidSkipgramCount.txt print('loading skipgram strength map') eidAndPattern2strength = util.loadWeightByEidAndFeatureMap(curr_dir + folder+'eidSkipgram2TFIDFStrength.txt', idx=-1) #(eid, feature, weight) file if (FLAGS_USE_TYPE): print('loading eid and type maps') eid2types, type2eids = util.loadFeaturesAndEidMap(curr_dir + folder+'eidTypeCounts.txt') #eidTypeCount.txt print('loading type strength map') eidAndType2strength = util.loadWeightByEidAndFeatureMap(curr_dir + folder+'eidType2TFIDFStrength.txt', idx=-1) #(eid, feature, weight) file end = time.time() print("Finish loading all dataset, using %s seconds" % (end-start)) ## Start set expansion # read good_gold_set.json fin = open(curr_dir + "/../../data/queries/good_gold_set.json") gold_sets = json.load(fin)