FLAGS_USE_TYPE = True cur_dir = os.path.dirname(os.getcwd()) dataset = "wiki2" print('dataset:%s' % dataset) folder = cur_dir + '/data/{}/intermediate/'.format(dataset) start = time.time() print('data folder: {}'.format(folder)) print('loading eid and name maps') eid2ename, ename2eid = util.loadEidToEntityMap(folder + 'entity2id.txt') print('loading eid and skipgram maps') eid2patterns, pattern2eids = util.loadFeaturesAndEidMap( folder + 'reduced_eidSkipgramCounts.txt') print('loading skipgram strength maps') eidAndPattern2strength = util.loadWeightByEidAndFeatureMap( folder + 'setexpan_eidSkipgram2TFIDFStrength.txt', idx=-1) print('loading eid and type maps') eid2types, type2eids = util.loadFeaturesAndEidMap(folder + 'eidTypeCounts.txt') print('loading type strength maps') eidAndType2strength = util.loadWeightByEidAndFeatureMap( folder + 'eidType2TFIDFStrength.txt', idx=-1) end = time.time() print("Finish loading all dataset, using %s seconds" % (end - start)) good_gold_set = {} for filename in os.listdir('../data/eval/cleaned_set/'): with open('../data/eval/cleaned_set/' + filename, 'r') as fin: setname = filename.split('.')[0] data = fin.readlines() ents = [] for line in data:
FLAGS_USE_TYPE = True ## Loading Corpus data = "bc5" print('dataset:%s' % data) folder = '../../data/' + data + '/' start = time.time() print('loading eid and name maps') eid2ename, ename2eid = util.loadEidToEntityMap(folder + 'entity2id.txt') #entity2id.txt print('loading eid and skipgram maps') eid2patterns, pattern2eids = util.loadFeaturesAndEidMap( folder + 'eidSkipgramCounts.txt') #eidSkipgramCount.txt print('loading skipgram strength map') eidAndPattern2strength = util.loadWeightByEidAndFeatureMap( folder + 'eidSkipgram2TFIDFStrength.txt', idx=-1) #(eid, feature, weight) file if (FLAGS_USE_TYPE): print('loading eid and type maps') eid2types, type2eids = util.loadFeaturesAndEidMap( folder + 'eidTypeCounts.txt') #eidTypeCount.txt print('loading type strength map') eidAndType2strength = util.loadWeightByEidAndFeatureMap( folder + 'eidType2TFIDFStrength.txt', idx=-1) #(eid, feature, weight) file end = time.time() print("Finish loading all dataset, using %s seconds" % (end - start)) ## Start set expansion enttypes = ['CHEMICAL', 'DISEASE']