def create_representation(args): rep_type = args['<representation>'] path = args['<representation_path>'] w_c = args['--w+c'] eig = float(args['--eig']) if rep_type == 'PPMI': if w_c: raise Exception('w+c is not implemented for PPMI.') else: return Explicit.load(path, True) elif rep_type == 'SVD': if w_c: return EnsembleEmbedding(SVDEmbedding(path, False, eig, False), SVDEmbedding(path, False, eig, True), True) else: return SVDEmbedding(path, True, eig) else: if w_c: return EnsembleEmbedding(Embedding.load(path + '.words', False), Embedding.load(path + '.contexts', False), True) else: return Embedding.load(path, True)
def create_representation(rep_type, path, *args, **kwargs): if rep_type == 'Explicit' or rep_type == 'PPMI': return Explicit.load(path, *args, **kwargs) elif rep_type == 'SVD': return SVDEmbedding(path, *args, **kwargs) elif rep_type == 'GIGA': return GigaEmbedding(path, *args, **kwargs) elif rep_type: return Embedding.load(path, *args, **kwargs)
def create_representation(rep_type, path, *args, **kwargs): if rep_type == 'Explicit' or rep_type == 'PPMI': return Explicit.load(path, *args, **kwargs) elif rep_type == 'SVD': return SVDEmbedding(path, *args, **kwargs) elif rep_type == 'GIGA': return GigaEmbedding(path, *args, **kwargs) elif rep_type == 'Embedding': return Embedding.load(path, *args, **kwargs)
def _process_exp_sense(self, articles, which='test'): exp_feat_name = FILE_PATH + '/../tmp/exp.feat' expParser = Explicit() exp_sense_file = codecs.open(exp_feat_name, 'w', 'utf-8') for art in articles: for rel in art.exp_relations: expParser.print_features(rel, ['xxxxx'], which, exp_sense_file) exp_sense_file.close() exp_pred = FILE_PATH + '/../tmp/exp.pred' Corpus.test_with_opennlp(exp_feat_name, expParser.model_file, exp_pred) exp_res = [l.strip().split()[-1] for l in codecs.open(exp_pred, 'r', 'utf-8')] rid = 0 for art in articles: for rel in art.exp_relations: pred_sense = exp_res[rid] rel.sense = [pred_sense] rid += 1
def _process_exp_sense(self, articles, which='test'): exp_feat_name = FILE_PATH + '/../tmp/exp.feat' expParser = Explicit() exp_sense_file = codecs.open(exp_feat_name, 'w', 'utf-8') for art in articles: for rel in art.exp_relations: expParser.print_features(rel, ['xxxxx'], which, exp_sense_file) exp_sense_file.close() exp_pred = FILE_PATH + '/../tmp/exp.pred' Corpus.test_with_opennlp(exp_feat_name, expParser.model_file, exp_pred) exp_res = [ l.strip().split()[-1] for l in codecs.open(exp_pred, 'r', 'utf-8') ] rid = 0 for art in articles: for rel in art.exp_relations: pred_sense = exp_res[rid] rel.sense = [pred_sense] rid += 1
def _process_exp_sense(self, articles, which='test'): exp_feat_name = FILE_PATH + '/../tmp/exp.feat' expParser = Explicit() exp_sense_file = open(exp_feat_name, 'w') for art in articles: for rel in art.exp_relations: expParser.print_features(rel, ['Conjunction'], which, exp_sense_file) exp_sense_file.close() exp_vec = FILE_PATH + '/../tmp/exp.vec' exp_pred = FILE_PATH + '/../tmp/exp.pred' # Corpus.test_with_svm(exp_feat_name, expParser.feat_map_file, exp_vec, expParser.model_file, exp_pred) Corpus.test_with_opennlp(exp_feat_name, expParser.model_file, exp_pred) exp_res = [LABEL_SENSES_MAP[l.strip().split()[-1]] for l in open(exp_pred, 'r')] rid = 0 for art in articles: for rel in art.exp_relations: pred_sense = exp_res[rid] rel.sense = [pred_sense] rid += 1
def simple_create_representation(rep_type, path, restricted_context=None, thresh=None, normalize=True): if rep_type == 'PPMI': return Explicit.load(path, normalize=normalize, restricted_context=restricted_context, thresh=thresh) else: return Embedding.load(path, True)