def process_dataset(self, name): data = FileIO.read_list_file(name) new_data = [] for d in data: d_list = d.split("\t") new_data.append("\t".join([d_list[0], d_list[1], d_list[2]])) FileIO.save_list_file("eval-datasets/lexical/7.txt", new_data)
def __init__(self): self.dataset = FileIO.read_json_file("sematch/benchmark-data/data.txt") self.results = FileIO.read_list_file("sematch/benchmark-data/result.txt") self.queries = [(d['query'],d['entity']) for d in self.dataset] self.relevants = [d['result'] for d in self.dataset] self.synsetExpansion = SynsetExpansion() self.engine = Engine() self.sims = ['wup', 'lch', 'res','jcn', 'lin'] self.thresholds = [0.9,1] self.gpcs = ['gpc1', 'gpc2', 'gpc3', 'gpc4', 'gpc5', 'gpc6']
def load_dataset(self, dataset_id): data = FileIO.read_list_file("eval-datasets/lexical/%s.txt" % dataset_id) word_pairs = map(lambda x: (x.split()[0], x.split()[1]), data) human = [d.split()[2] for d in data] return word_pairs, human
def __init__(self): self.links = FileIO.read_json_file("db/type-linkings.txt") self.links = {data['offset']:data for data in self.links} self.query_processor = Query() self.expander = SynsetExpansion(0.95, 'wup')