def train_iteration(opts, assoc_model, trainer, do_sym, log_file): """ Setup where association scores are learned, relation by relation. based on `model.macro_loops()` :return: full-graph iteration scores """ ep_loss = 0.0 # iterate over relations: graphs_order = list(assoc_model.graphs.items()) # TODO maybe even randomize edge order across relations if opts.rand_nodes: dy.np.random.shuffle(graphs_order) for rel, g in graphs_order: # report if opts.v > 0: timeprint('starting loop over {}'.format(rel)) if opts.rule_override and rel in SYMMETRIC_RELATIONS and not do_sym: timeprint('RELATION OVERRIDEN') continue # iterate over nodes (each as source + as target): node_order = list(range(N)) if opts.rand_nodes: dy.np.random.shuffle(node_order) for node in tqdm(node_order): if opts.debug and node % 100 != 0: continue ep_loss += node_iteration(rel, g, node, opts, assoc_model, trainer, log_file, is_source=True) ep_loss += node_iteration(rel, g, node, opts, assoc_model, trainer, log_file, is_source=False) return ep_loss
def __init__(self, graphs, embeddings, mode=TRANSLATIONAL_EMBED_MODE, dropout=0.0, model_path=None): """ :param graphs: dictionary of <relation:CSR-format graph>s, node-aligned :param embeddings: list of numpy array embeddings, indices aligned to nodes :param mode: mode of calculating association score, options: {} """.format(MODES_STR) # input validation graph_sizes = list(set([g.shape[0] for g in list(graphs.values())])) assert len(graph_sizes) == 1 assert len(embeddings) == graph_sizes[0], '{} != {}'.format(len(embeddings), graph_sizes[0]) # raw members self.graphs = {canonicalize_name(k):g for k,g in list(graphs.items())} self.mode = mode # documenationy members self.relation_names = sorted(self.graphs.keys()) if 'co_hypernym' in self.relation_names: self.relation_names.remove('co_hypernym') self.vocab_size = graph_sizes[0] self.R = len(self.relation_names) self.emb_dim = len(embeddings[0]) self.dropout = dropout # model members self.model = dy.Model() # TODO consider using no_update param for embeddings self.embeddings = self.model.add_lookup_parameters((self.vocab_size, self.emb_dim)) self.embeddings.init_from_array(embeddings) # init association parameter self.no_assoc = False # so can be overriden in inheritors # first determine if self.mode == BILINEAR_MODE: # full-rank bilinear matrix assoc_dim = (self.emb_dim, self.emb_dim) elif self.mode == DIAG_RANK1_MODE: # diagonal bilinear matrix + rank 1 matrix # first row = diagonal # second row = 'source factor' # third row = 'target factor' assoc_dim = (3, self.emb_dim) elif self.mode == TRANSLATIONAL_EMBED_MODE: # additive relational vector assoc_dim = self.emb_dim elif self.mode == DISTMULT: # diagonal bilinear matrix assoc_dim = self.emb_dim else: raise ValueError('unsupported mode: {}. allowed are {}'\ .format(self.mode, ', '.join(MODES_STR))) # init actual parameter self.word_assoc_weights = {r:self.model.add_parameters(assoc_dim) for r in self.relation_names} if model_path is not None: self.model.populate(model_path + '.dyn') timeprint('finished initialization for association model.')
def init_ergm_features(self, graphs=None): """ Computes ERGM features from scratch, populates cache members and self.feature_vals :param graphs: if not None, changes underlying member and inits from it. """ if graphs is not None: self.graphs = graphs self.feature_vals = {} self.init_ergm_cache() self.update_features() timeprint('initialized features from cache')
def node_iteration(rel, g, node, opts, assoc_model, trainer, log_file, is_source): """ Perform one iteration of trying to score a node's neighbors above negative samples. """ # true instances likelihood trues = targets(g, node) if is_source else sources(g, node) side = '->' if is_source else '<-' if len(trues) == 0: return 0.0 if opts.debug: dy.renew_cg(immediate_compute = True, check_validity = True) else: dy.renew_cg() # compute association score as dynet expression (can't do this above due to staleness) true_scores = [] for tr in trues: if is_source: j_assoc_score = assoc_model.word_assoc_score(node, tr, rel) else: j_assoc_score = assoc_model.word_assoc_score(tr, node, rel) if log_file is not None: log_file.write('{} {}\tTRUE_{}\t{:.3e}\n'\ .format(node, side, tr, j_assoc_score.scalar_value())) true_scores.append(j_assoc_score) # false targets likelihood - negative sampling (uniform) # collect negative samples if opts.nll: sample_scores = [[ts] for ts in true_scores] else: margins = [] neg_samples = [np.random.choice(range(N)) for _ in range(opts.neg_samp * len(trues))] # remove source and true targets if applicable for t in [node] + trues: if t in neg_samples: neg_samples.remove(t) neg_samples.append(np.random.choice(range(N))) for (i,ns) in enumerate(neg_samples): # compute association score as dynet expression if is_source: ns_assoc_score = assoc_model.word_assoc_score(node, ns, rel) else: ns_assoc_score = assoc_model.word_assoc_score(ns, node, rel) if log_file is not None: log_file.write('{} {}\tNEG_{}\t{:.3e}\n'\ .format(node, side, ns, ns_assoc_score.scalar_value())) corresponding_true = i // opts.neg_samp if opts.nll: sample_scores[corresponding_true].append(ns_assoc_score) else: # TODO maybe use dy.hinge() ctt_score = true_scores[corresponding_true] margin = ctt_score - ns_assoc_score margins.append(dy.rectify(dy.scalarInput(1.0) - margin)) # compute overall loss if opts.nll: if len(sample_scores) == 0: dy_loss = dy.scalarInput(0.0) else: dy_loss = dy.esum([dy.pickneglogsoftmax(dy.concatenate(scrs), 0) for scrs in sample_scores]) else: if len(margins) == 0: dy_loss = dy.scalarInput(0.0) else: dy_loss = dy.esum(margins) sc_loss = dy_loss.scalar_value() if log_file is not None: log_file.write('{}\tLOSS\t{:.3e}\n'\ .format(node, sc_loss)) # backprop and recompute score if opts.v > 1: timeprint('overall loss for relation {}, node {} as {} = {:.6f}'\ .format(rel, node, 'source' if is_source else 'target', sc_loss)) dy_loss.backward() trainer.update() return sc_loss
def remove_edge(self, src_idx, trg_idx, rel, update_feats=True, permanent=True, caches=None, report_feat_diff=False): """ Removes edge from graph, updates cache and feature values :param src_idx: index of source node from edge to remove :param trg_idx: index of target node from edge to remove :param rel: relation type :param update_feats: flag for not updating all cache and features, to be deferred :returns: if permanent=False, returns ergm score of removed-edge graph """ if permanent: self.score_is_stale = True cached_feats = None cached_cache = None else: if caches is not None: cached_cache = copy.deepcopy(caches[0]) cached_feats = caches[1] else: cached_cache = copy.deepcopy(self.cache) cached_feats = copy.deepcopy(self.feature_vals) update_feats = True # no other mode possible # update cache members # decrement edge count for rel self.edge_counts[rel] -= 1 # pair cache members for r, g in list(self.graphs.items()): if rel == 'hypernym' and r == 'co_hypernym': continue # decrement mutual edge count for pairs with trg-src edges if g[trg_idx, src_idx] == 1: self.mutual_edge_counts[find_key(self.mutual_edge_counts, (rel, r))] -= 1 # decrement two-paths for x-src-trg and src-trg-y self.two_path_counts[(r, rel)] -= self.in_degs[r][0, src_idx] self.two_path_counts[(rel, r)] -= self.out_degs[r][0, trg_idx] # triplet cache members for r2, g2 in list(self.graphs.items()): if rel == 'hypernym' and r2 == 'co_hypernym': continue # decrement transitive closures from two-paths src-x-trg if self.out_degs[r][0, src_idx] > 0 and self.in_degs[r2][ 0, trg_idx] > 0: r_r2_betweens = (g[src_idx] * g2[:, trg_idx]).sum() self.transitive_closure_counts[(r, r2, rel)] -= r_r2_betweens # decrement directed triangle count if self.out_degs[r2][0, trg_idx] > 0 and self.in_degs[r][ 0, src_idx] > 0: r_r2_cycles = (g2[trg_idx] * g[:, src_idx]).sum() rs_key = find_cyclic_key(self.directed_triangle_counts, (r, rel, r2)) self.directed_triangle_counts[rs_key] -= r_r2_cycles # decrement src's out_degree and trg's in_degree in rel and update all related caches self.out_degs[rel][0, src_idx] -= 1 self.in_degs[rel][0, trg_idx] -= 1 if update_feats: # recompute heavy cache updates from raw counts self.update_stars_cache_from_edge(rel, src_idx, trg_idx, added=False) # update features from caches self.update_features() if not permanent and report_feat_diff: timeprint('changed feature values:') diff_keys = [ k for k in self.feature_vals if self.feature_vals[k] != cached_feats[k] ] if len(diff_keys) > 0: print('\n'.join(['{}: from {} to {}'\ .format(k, cached_feats[k], self.feature_vals[k]) for k in diff_keys])) if permanent: # remove actual edge self.graphs[rel][src_idx, trg_idx] = 0 else: if rel == 'hypernym': self.zero_all_feats('co_hypernym') # prepare return value ret = self.ergm_score() # revert everything self.reread_cache(cached_cache) self.feature_vals = cached_feats # return prepared score return ret
parser.add_argument("--neg-samp", type=int, default=ASSOC_DEFAULT_NEGS, help="nodes for negative sample") parser.add_argument("--rand-nodes", action="store_false", help="randomize node order in training") parser.add_argument("--rule-override", action="store_false", help="rule-based override for symmetric relations") parser.add_argument("--eval-dev", action='store_true', help="evaluate on dev set (otherwise - test)") parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--nll", action='store_true', help="use negative log likelihood loss") parser.add_argument("--no-log", action='store_true') parser.add_argument("--early-stopping", action='store_true', help="stop if model hasn't improved in 3 epochs") parser.add_argument("--v", type=int, default=1, help="verbosity") parser.add_argument("--debug", action='store_true') opts = parser.parse_args() start_time = datetime.now() # reporting timeprint('graphs file = {}'.format(opts.input)) if opts.embeddings is not None: timeprint('embeddings file = {}'.format(opts.embeddings)) else: timeprint('embeddings size = {}'.format(opts.emb_size)) timeprint('association mode = {}'.format(opts.assoc_mode)) timeprint('negative samples = {}'.format(opts.neg_samp)) if opts.model is None: timeprint('model file = {}'.format(opts.model_out)) if opts.nll: timeprint('using negative log likelihood loss') else: timeprint('using margin loss') if opts.no_log: timeprint('no log file. timestamp for test: {}_{}' \ .format(start_time.date(), start_time.time()))
def init_ergm_cache(self): """ Computes ERGM features from scratch, populates cache members """ # edges for r in self.relation_names: edges = edge_count(self.graphs[r]) self.edge_counts[r] = edges timeprint('populated edge cache') # mutual edges for i, n1 in enumerate(self.relation_names): r1 = self.graphs[n1] for j in range(i+1): # unordered, including self n2 = self.relation_names[j] r2 = self.graphs[n2] mut_edges = mutual_edges(r1, r2) self.mutual_edge_counts[(n1, n2)] = mut_edges timeprint('populated mutual edge cache') # directed triangles - iterate over R^2 + choose(r,3)/3 ordered relation triplets eye = csr_eye(self.vocab_size) for i,j,k in cyclic_triads(self.R): n1, n2, n3 = self.relation_names[i], self.relation_names[j], self.relation_names[k] r1, r2, r3 = self.graphs[n1], self.graphs[n2], self.graphs[n3] dir_triangles = (r1 * r2 * r3).multiply(eye).sum() if i == j and j == k: # each triangle was counted thrice, except self loops self_loops = r1.diagonal().sum() dir_triangles += 2 * self_loops dir_triangles /=3 self.directed_triangle_counts[(n1, n2, n3)] = dir_triangles timeprint('extracted directed triangle features') # transitive closures - iterate over ordered relation triplets # (also populate self.two_path_counts) for n1, r1 in list(self.graphs.items()): for n2, r2 in list(self.graphs.items()): two_paths = r1 * r2 two_path_count = max([two_paths.sum(), sys.float_info.epsilon]) self.two_path_counts[(n1, n2)] = two_path_count for n3, r3 in list(self.graphs.items()): closures = two_paths.multiply(r3).sum() # pointwise self.transitive_closure_counts[(n1, n2, n3)] = closures timeprint('populated transitivity cache') # 1-star cache for updates + self-2,3-stars for n, g in list(self.graphs.items()): self.in_degs[n] = g.sum(0) # numpy matrix self.out_degs[n] = g.sum(1).transpose() # numpy matrix osc = one_rel_star_counts(self.in_degs[n], self.out_degs[n]) self.in_one_star_counts[n] = osc['i1sc'] self.out_one_star_counts[n] = osc['o1sc'] self.in_two_star_counts[(n, n)] = osc['i2sc'] self.out_two_star_counts[(n, n)] = osc['o2sc'] self.in_three_star_counts[(n, n, n)] = osc['i3sc'] self.out_three_star_counts[(n, n, n)] = osc['o3sc'] self.in_one_p_star_counts[n] = osc['i1psc'] self.out_one_p_star_counts[n] = osc['o1psc'] self.in_two_p_star_counts[(n, n)] = osc['i2psc'] self.out_two_p_star_counts[(n, n)] = osc['o2psc'] self.in_three_p_star_counts[(n, n, n)] = osc['i3psc'] self.out_three_p_star_counts[(n, n, n)] = osc['o3psc'] timeprint('populated 1r-star cache') # 2-stars for n1, n2 in combinations(self.relation_names, 2): tsc = two_rel_star_counts(self.in_degs[n1], self.out_degs[n1],\ self.in_degs[n2], self.out_degs[n2]) self.in_two_star_counts[(n1, n2)] = tsc['i2sc'] self.out_two_star_counts[(n1, n2)] = tsc['o2sc'] self.in_three_star_counts[(n1, n1, n2)] = tsc['i3sc112'] self.out_three_star_counts[(n1, n1, n2)] = tsc['o3sc112'] self.in_three_star_counts[(n1, n2, n2)] = tsc['i3sc122'] self.out_three_star_counts[(n1, n2, n2)] = tsc['o3sc122'] self.in_two_p_star_counts[(n1, n2)] = tsc['i2psc'] self.out_two_p_star_counts[(n1, n2)] = tsc['o2psc'] self.in_three_p_star_counts[(n1, n1, n2)] = tsc['i3psc112'] self.out_three_p_star_counts[(n1, n1, n2)] = tsc['o3psc112'] self.in_three_p_star_counts[(n1, n2, n2)] = tsc['i3psc122'] self.out_three_p_star_counts[(n1, n2, n2)] = tsc['o3psc122'] timeprint('populated 2r-star cache') # 3-stars for n1, n2, n3 in combinations(self.relation_names, 3): ttsc = three_rel_star_counts(self.in_degs[n1], self.out_degs[n1],\ self.in_degs[n2], self.out_degs[n2],\ self.in_degs[n3], self.out_degs[n3]) self.in_three_star_counts[(n1, n2, n3)] = ttsc['i3sc'] self.out_three_star_counts[(n1, n2, n3)] = ttsc['o3sc'] self.in_three_p_star_counts[(n1, n2, n3)] = ttsc['i3psc'] self.out_three_p_star_counts[(n1, n2, n3)] = ttsc['o3psc'] timeprint('populated 3r-star cache')
def __init__(self, graphs, embeddings, assoc_mode=BILINEAR_MODE, reg=0.0, dropout=0.0, no_assoc=False, model_path=None, ergm_path=None, path_only_init=False): """ :param graphs: dictionary of {relation:CSR-format graph}s, node-aligned :param embeddings: list of numpy array embeddings, indices aligned to nodes :param model_path: optional path for files with pre-trained association model (read by super) :param ergm_path: optional path for files with pre-trained model :param path_only_init: model_path only used for initialization """ # input validation AssociationModel.__init__(self, graphs, embeddings, assoc_mode, dropout, model_path=model_path) # raw members self.no_assoc = no_assoc self.regularize = reg # cache members self.cache = {} self.edge_counts = self.add_cache_dict('ec') # keys are single relations self.mutual_edge_counts = self.add_cache_dict('mec') # keys are unordered relation pairs self.two_path_counts = self.add_cache_dict('tpc') # keys are ordered relation pairs self.transitive_closure_counts = self.add_cache_dict('tcc') # keys are ordered relation triplets self.directed_triangle_counts = self.add_cache_dict('dtc') # keys are ordered relation triplets self.in_degs = self.add_cache_dict('ins') # keys are single relations, values are big lists self.out_degs = self.add_cache_dict('outs') # keys are single relations, values are big lists self.in_one_star_counts = self.add_cache_dict('i1sc') # keys are single relations self.out_one_star_counts = self.add_cache_dict('o1sc') # keys are single relations self.in_two_star_counts = self.add_cache_dict('i2sc') # keys are unordered relation pairs self.out_two_star_counts = self.add_cache_dict('o2sc') # keys are unordered relation pairs self.in_three_star_counts = self.add_cache_dict('i3sc') # keys are unordered relation triplets self.out_three_star_counts = self.add_cache_dict('o3sc') # keys are unordered relation triplets # 'at least k' stars - 'one/two/three plus' self.in_one_p_star_counts = self.add_cache_dict('i1psc') # keys are single relations self.out_one_p_star_counts = self.add_cache_dict('o1psc') # keys are single relations self.in_two_p_star_counts = self.add_cache_dict('i2psc') # keys are unordered relation pairs self.out_two_p_star_counts = self.add_cache_dict('o2psc') # keys are unordered relation pairs self.in_three_p_star_counts = self.add_cache_dict('i3psc') # keys are unordered relation triplets self.out_three_p_star_counts = self.add_cache_dict('o3psc') # keys are unordered relation triplets self.missing_node_indices = [] # updates during training (NOT SURE IF NEEDED) timeprint('computing ERGM features...') self.init_ergm_features() # populates self.feature_vals timeprint('finished! computed {} features'.format(len(self.feature_vals))) timeprint('{} non-zero features'.format(np.count_nonzero(list(self.feature_vals.values())))) # documentationy again, for efficient updates encountered_features = list(self.feature_vals.keys()) # canonical ordering from now on if ergm_path is not None: ergm_model_path = ergm_path elif (model_path is not None) and (not path_only_init): ergm_model_path = model_path else: ergm_model_path = None if ergm_model_path is None: self.feature_set = encountered_features else: self.feature_set = pickle.load(open(ergm_model_path + '.feats')) assert sorted(self.feature_set) == sorted(encountered_features) if ergm_model_path is None: self.ergm_weights = self.model.add_parameters(len(self.feature_set)) if model_path is None and ergm_model_path is None: # 'model_path is not None' is initialized in super() # TODO support other association modes (affects downstream) if self.no_assoc: self.word_assoc_weights = {r:self.model.add_parameters((self.emb_dim, self.emb_dim), init=dy.ConstInitializer(0.0)) for r in self.relation_names} else: self.word_assoc_weights = {r:self.model.add_parameters((self.emb_dim, self.emb_dim)) for r in self.relation_names} elif ergm_model_path is not None: pc = dy.ParameterCollection() dy.load(ergm_model_path + '.dyn', pc) pc_list = pc.parameters_list() i = 0 self.ergm_weights = pc_list[i] if not path_only_init: self.word_assoc_weights = {} rel_order = self.relation_names for r in rel_order: i += 1 self.word_assoc_weights[r] = pc_list[i] i += 1 assert i == len(pc_list),\ '{} relation params read but length is {}'.format(i, len(pc_list)) self.dy_score = self.ergm_score() self.score = self.dy_score.scalar_value() self.score_is_stale = False timeprint('finished initialization. initial ERGM score = {}'.format(self.score))
LOWER = True ALL_LEMMAS = True def lemmas(s): if ALL_LEMMAS: name = '_'.join(s.lemma_names()) else: name = s.lemma_names()[0] if LOWER: name = name.lower() return name.split('_') if __name__ == '__main__': if len(sys.argv) < 3: timeprint('usage: embed_from_words.py input_embs output_embs [WN prediction dataset]') exit(1) in_file = sys.argv[1] # create target dataset if len(sys.argv) > 3: # third param is WN dataset wn_vocab = load_prediction_dataset(sys.argv[3])[-1] synsets = [wn.synset(w) for w in wn_vocab] else: synsets = list(wn.all_synsets()) timeprint('read {} synsets'.format(len(synsets))) target_words = set() timeprint('preparing target word dataset')
def node_iteration(rel, g, node, opts, assoc_model, trainer, log_file, is_source): """ Perform one iteration of trying to score a node's neighbors above negative samples. """ # true instances likelihood trues = targets(g, node) if is_source else sources(g, node) side = '->' if is_source else '<-' if len(trues) == 0: return 0.0 if opts.debug: dy.renew_cg(immediate_compute=True, check_validity=True) else: dy.renew_cg() # compute association score as dynet expression (can't do this above due to staleness) true_scores = [] for tr in trues: if is_source: j_assoc_score = assoc_model.word_assoc_score(node, tr, rel) else: j_assoc_score = assoc_model.word_assoc_score(tr, node, rel) if log_file is not None: log_file.write('{} {}\tTRUE_{}\t{:.3e}\n'\ .format(node, side, tr, j_assoc_score.scalar_value())) true_scores.append(j_assoc_score) # false targets likelihood - negative sampling (uniform) # collect negative samples if opts.nll: sample_scores = [[ts] for ts in true_scores] else: margins = [] neg_samples = [ np.random.choice(range(N)) for _ in range(opts.neg_samp * len(trues)) ] # remove source and true targets if applicable for t in [node] + trues: if t in neg_samples: neg_samples.remove(t) neg_samples.append(np.random.choice(range(N))) for (i, ns) in enumerate(neg_samples): # compute association score as dynet expression if is_source: ns_assoc_score = assoc_model.word_assoc_score(node, ns, rel) else: ns_assoc_score = assoc_model.word_assoc_score(ns, node, rel) if log_file is not None: log_file.write('{} {}\tNEG_{}\t{:.3e}\n'\ .format(node, side, ns, ns_assoc_score.scalar_value())) corresponding_true = i // opts.neg_samp if opts.nll: sample_scores[corresponding_true].append(ns_assoc_score) else: # TODO maybe use dy.hinge() ctt_score = true_scores[corresponding_true] margin = ctt_score - ns_assoc_score margins.append(dy.rectify(dy.scalarInput(1.0) - margin)) # compute overall loss if opts.nll: if len(sample_scores) == 0: dy_loss = dy.scalarInput(0.0) else: dy_loss = dy.esum([ dy.pickneglogsoftmax(dy.concatenate(scrs), 0) for scrs in sample_scores ]) else: if len(margins) == 0: dy_loss = dy.scalarInput(0.0) else: dy_loss = dy.esum(margins) sc_loss = dy_loss.scalar_value() if log_file is not None: log_file.write('{}\tLOSS\t{:.3e}\n'\ .format(node, sc_loss)) # backprop and recompute score if opts.v > 1: timeprint('overall loss for relation {}, node {} as {} = {:.6f}'\ .format(rel, node, 'source' if is_source else 'target', sc_loss)) dy_loss.backward() trainer.update() return sc_loss
def macro_loops(opts, ep_idx, multi_graph, trainer, log_file, synsets, use_assoc=True): """ Passing over graph node by node, relation by relation. Single update returned, based on importance sampling from entire graph. :param opts: parameter dictionary from calling model :param ep_idx: epoch index :param multi_graph: trained data structure :param trainer: dynet training module :param log_file: log file location :param synsets: synset name dictionary for reporting :param use_assoc: include association component in scores :return: node-iteration scores """ iteration_scores = [] iteration_scores.append(multi_graph.score) N = multi_graph.vocab_size timeprint('caching original graph features') # report if opts.v > 0: timeprint('starting epoch {}'.format(ep_idx)) if not opts.rand_all: # iterate over relations graphs_order = list(multi_graph.graphs.keys()) if opts.rand_nodes: dy.np.random.shuffle(graphs_order) for rel in graphs_order: # report if opts.v > 0: timeprint('starting loop over {}'.format(rel)) if opts.skip_symmetrics and rel in SYMMETRIC_RELATIONS: timeprint('skipping symmetric relation {}'.format(rel)) continue if rel == 'co_hypernym': timeprint('skipping auxiliary co_hypernym relation') continue # compute target-wide association cache (no backprop) if use_assoc: assoc_cache = multi_graph.source_ranker_cache(rel) else: assoc_cache = np.zeros( (multi_graph.word_assoc_weights[rel].shape()[0], multi_graph.embeddings.shape()[1])) timeprint('calculated association cache for {}'.format(rel)) # iterate over nodes: node_order = list(range(N)) if opts.rand_nodes: dy.np.random.shuffle(node_order) for src_i in tqdm(node_order): assoc_cache = macro_node_iteration(opts, multi_graph, assoc_cache, trainer, log_file, synsets, rel, src_i, use_assoc) multi_graph.rescore() # total score = sum over all nodes iteration_scores.append(multi_graph.score) else: # iterate randomly over <rel, node>-s iid # rand_nodes implied all_rels = list(multi_graph.graphs.keys()) if opts.skip_symmetrics: rels = [r for r in all_rels if r not in SYMMETRIC_RELATIONS] else: rels = all_rels if 'co_hypernym' in rels: rels.remove('co_hypernym') if use_assoc: assoc_caches = { rel: multi_graph.source_ranker_cache(rel) for rel in rels } else: assoc_caches = { rel: np.zeros((multi_graph.word_assoc_weights[rel].shape()[0], multi_graph.embeddings.shape()[1])) for rel in rels } relnode_order = list(range(N * len(rels))) dy.np.random.shuffle(relnode_order) for idx in tqdm(relnode_order): rel = rels[idx % len(rels)] src_i = idx % N assoc_caches[rel] = macro_node_iteration(opts, multi_graph, assoc_caches[rel], trainer, log_file, synsets, rel, src_i, use_assoc) # only happens once in this setup multi_graph.rescore() # total score = sum over all nodes iteration_scores.append(multi_graph.score) return iteration_scores
parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--nll", action='store_true', help="use negative log likelihood loss") parser.add_argument("--no-log", action='store_true') parser.add_argument("--early-stopping", action='store_true', help="stop if model hasn't improved in 3 epochs") parser.add_argument("--v", type=int, default=1, help="verbosity") parser.add_argument("--debug", action='store_true') opts = parser.parse_args() start_time = datetime.now() # reporting timeprint('graphs file = {}'.format(opts.input)) if opts.embeddings is not None: timeprint('embeddings file = {}'.format(opts.embeddings)) else: timeprint('embeddings size = {}'.format(opts.emb_size)) timeprint('association mode = {}'.format(opts.assoc_mode)) timeprint('negative samples = {}'.format(opts.neg_samp)) if opts.model is None: timeprint('model file = {}'.format(opts.model_out)) if opts.nll: timeprint('using negative log likelihood loss') else: timeprint('using margin loss') if opts.no_log: timeprint('no log file. timestamp for test: {}_{}' \ .format(start_time.date(), start_time.time()))
def __init__(self, graphs, embeddings, assoc_mode=BILINEAR_MODE, reg=0.0, dropout=0.0, no_assoc=False, model_path=None, ergm_path=None, path_only_init=False): """ :param graphs: dictionary of {relation:CSR-format graph}s, node-aligned :param embeddings: list of numpy array embeddings, indices aligned to nodes :param model_path: optional path for files with pre-trained association model (read by super) :param ergm_path: optional path for files with pre-trained model :param path_only_init: model_path only used for initialization """ # input validation AssociationModel.__init__(self, graphs, embeddings, assoc_mode, dropout, model_path=model_path) # raw members self.no_assoc = no_assoc self.regularize = reg # cache members self.cache = {} self.edge_counts = self.add_cache_dict( 'ec') # keys are single relations self.mutual_edge_counts = self.add_cache_dict( 'mec') # keys are unordered relation pairs self.two_path_counts = self.add_cache_dict( 'tpc') # keys are ordered relation pairs self.transitive_closure_counts = self.add_cache_dict( 'tcc') # keys are ordered relation triplets self.directed_triangle_counts = self.add_cache_dict( 'dtc') # keys are ordered relation triplets self.in_degs = self.add_cache_dict( 'ins') # keys are single relations, values are big lists self.out_degs = self.add_cache_dict( 'outs') # keys are single relations, values are big lists self.in_one_star_counts = self.add_cache_dict( 'i1sc') # keys are single relations self.out_one_star_counts = self.add_cache_dict( 'o1sc') # keys are single relations self.in_two_star_counts = self.add_cache_dict( 'i2sc') # keys are unordered relation pairs self.out_two_star_counts = self.add_cache_dict( 'o2sc') # keys are unordered relation pairs self.in_three_star_counts = self.add_cache_dict( 'i3sc') # keys are unordered relation triplets self.out_three_star_counts = self.add_cache_dict( 'o3sc') # keys are unordered relation triplets # 'at least k' stars - 'one/two/three plus' self.in_one_p_star_counts = self.add_cache_dict( 'i1psc') # keys are single relations self.out_one_p_star_counts = self.add_cache_dict( 'o1psc') # keys are single relations self.in_two_p_star_counts = self.add_cache_dict( 'i2psc') # keys are unordered relation pairs self.out_two_p_star_counts = self.add_cache_dict( 'o2psc') # keys are unordered relation pairs self.in_three_p_star_counts = self.add_cache_dict( 'i3psc') # keys are unordered relation triplets self.out_three_p_star_counts = self.add_cache_dict( 'o3psc') # keys are unordered relation triplets self.missing_node_indices = [ ] # updates during training (NOT SURE IF NEEDED) timeprint('computing ERGM features...') self.init_ergm_features() # populates self.feature_vals timeprint('finished! computed {} features'.format( len(self.feature_vals))) timeprint('{} non-zero features'.format( np.count_nonzero(list(self.feature_vals.values())))) # documentationy again, for efficient updates encountered_features = list( self.feature_vals.keys()) # canonical ordering from now on if ergm_path is not None: ergm_model_path = ergm_path elif (model_path is not None) and (not path_only_init): ergm_model_path = model_path else: ergm_model_path = None if ergm_model_path is None: self.feature_set = encountered_features else: self.feature_set = pickle.load(open(ergm_model_path + '.feats')) assert sorted(self.feature_set) == sorted(encountered_features) if ergm_model_path is None: self.ergm_weights = self.model.add_parameters(len( self.feature_set)) if model_path is None and ergm_model_path is None: # 'model_path is not None' is initialized in super() # TODO support other association modes (affects downstream) if self.no_assoc: self.word_assoc_weights = { r: self.model.add_parameters((self.emb_dim, self.emb_dim), init=dy.ConstInitializer(0.0)) for r in self.relation_names } else: self.word_assoc_weights = { r: self.model.add_parameters((self.emb_dim, self.emb_dim)) for r in self.relation_names } elif ergm_model_path is not None: pc = dy.ParameterCollection() dy.load(ergm_model_path + '.dyn', pc) pc_list = pc.parameters_list() i = 0 self.ergm_weights = pc_list[i] if not path_only_init: self.word_assoc_weights = {} rel_order = self.relation_names for r in rel_order: i += 1 self.word_assoc_weights[r] = pc_list[i] i += 1 assert i == len(pc_list),\ '{} relation params read but length is {}'.format(i, len(pc_list)) self.dy_score = self.ergm_score() self.score = self.dy_score.scalar_value() self.score_is_stale = False timeprint('finished initialization. initial ERGM score = {}'.format( self.score))
def init_ergm_cache(self): """ Computes ERGM features from scratch, populates cache members """ # edges for r in self.relation_names: edges = edge_count(self.graphs[r]) self.edge_counts[r] = edges timeprint('populated edge cache') # mutual edges for i, n1 in enumerate(self.relation_names): r1 = self.graphs[n1] for j in range(i + 1): # unordered, including self n2 = self.relation_names[j] r2 = self.graphs[n2] mut_edges = mutual_edges(r1, r2) self.mutual_edge_counts[(n1, n2)] = mut_edges timeprint('populated mutual edge cache') # directed triangles - iterate over R^2 + choose(r,3)/3 ordered relation triplets eye = csr_eye(self.vocab_size) for i, j, k in cyclic_triads(self.R): n1, n2, n3 = self.relation_names[i], self.relation_names[ j], self.relation_names[k] r1, r2, r3 = self.graphs[n1], self.graphs[n2], self.graphs[n3] dir_triangles = (r1 * r2 * r3).multiply(eye).sum() if i == j and j == k: # each triangle was counted thrice, except self loops self_loops = r1.diagonal().sum() dir_triangles += 2 * self_loops dir_triangles /= 3 self.directed_triangle_counts[(n1, n2, n3)] = dir_triangles timeprint('extracted directed triangle features') # transitive closures - iterate over ordered relation triplets # (also populate self.two_path_counts) for n1, r1 in list(self.graphs.items()): for n2, r2 in list(self.graphs.items()): two_paths = r1 * r2 two_path_count = max([two_paths.sum(), sys.float_info.epsilon]) self.two_path_counts[(n1, n2)] = two_path_count for n3, r3 in list(self.graphs.items()): closures = two_paths.multiply(r3).sum() # pointwise self.transitive_closure_counts[(n1, n2, n3)] = closures timeprint('populated transitivity cache') # 1-star cache for updates + self-2,3-stars for n, g in list(self.graphs.items()): self.in_degs[n] = g.sum(0) # numpy matrix self.out_degs[n] = g.sum(1).transpose() # numpy matrix osc = one_rel_star_counts(self.in_degs[n], self.out_degs[n]) self.in_one_star_counts[n] = osc['i1sc'] self.out_one_star_counts[n] = osc['o1sc'] self.in_two_star_counts[(n, n)] = osc['i2sc'] self.out_two_star_counts[(n, n)] = osc['o2sc'] self.in_three_star_counts[(n, n, n)] = osc['i3sc'] self.out_three_star_counts[(n, n, n)] = osc['o3sc'] self.in_one_p_star_counts[n] = osc['i1psc'] self.out_one_p_star_counts[n] = osc['o1psc'] self.in_two_p_star_counts[(n, n)] = osc['i2psc'] self.out_two_p_star_counts[(n, n)] = osc['o2psc'] self.in_three_p_star_counts[(n, n, n)] = osc['i3psc'] self.out_three_p_star_counts[(n, n, n)] = osc['o3psc'] timeprint('populated 1r-star cache') # 2-stars for n1, n2 in combinations(self.relation_names, 2): tsc = two_rel_star_counts(self.in_degs[n1], self.out_degs[n1],\ self.in_degs[n2], self.out_degs[n2]) self.in_two_star_counts[(n1, n2)] = tsc['i2sc'] self.out_two_star_counts[(n1, n2)] = tsc['o2sc'] self.in_three_star_counts[(n1, n1, n2)] = tsc['i3sc112'] self.out_three_star_counts[(n1, n1, n2)] = tsc['o3sc112'] self.in_three_star_counts[(n1, n2, n2)] = tsc['i3sc122'] self.out_three_star_counts[(n1, n2, n2)] = tsc['o3sc122'] self.in_two_p_star_counts[(n1, n2)] = tsc['i2psc'] self.out_two_p_star_counts[(n1, n2)] = tsc['o2psc'] self.in_three_p_star_counts[(n1, n1, n2)] = tsc['i3psc112'] self.out_three_p_star_counts[(n1, n1, n2)] = tsc['o3psc112'] self.in_three_p_star_counts[(n1, n2, n2)] = tsc['i3psc122'] self.out_three_p_star_counts[(n1, n2, n2)] = tsc['o3psc122'] timeprint('populated 2r-star cache') # 3-stars for n1, n2, n3 in combinations(self.relation_names, 3): ttsc = three_rel_star_counts(self.in_degs[n1], self.out_degs[n1],\ self.in_degs[n2], self.out_degs[n2],\ self.in_degs[n3], self.out_degs[n3]) self.in_three_star_counts[(n1, n2, n3)] = ttsc['i3sc'] self.out_three_star_counts[(n1, n2, n3)] = ttsc['o3sc'] self.in_three_p_star_counts[(n1, n2, n3)] = ttsc['i3psc'] self.out_three_p_star_counts[(n1, n2, n3)] = ttsc['o3psc'] timeprint('populated 3r-star cache')
def remove_edge(self, src_idx, trg_idx, rel, update_feats=True, permanent=True, caches=None, report_feat_diff=False): """ Removes edge from graph, updates cache and feature values :param src_idx: index of source node from edge to remove :param trg_idx: index of target node from edge to remove :param rel: relation type :param update_feats: flag for not updating all cache and features, to be deferred :returns: if permanent=False, returns ergm score of removed-edge graph """ if permanent: self.score_is_stale = True cached_feats = None cached_cache = None else: if caches is not None: cached_cache = copy.deepcopy(caches[0]) cached_feats = caches[1] else: cached_cache = copy.deepcopy(self.cache) cached_feats = copy.deepcopy(self.feature_vals) update_feats=True # no other mode possible # update cache members # decrement edge count for rel self.edge_counts[rel] -= 1 # pair cache members for r,g in list(self.graphs.items()): if rel == 'hypernym' and r == 'co_hypernym': continue # decrement mutual edge count for pairs with trg-src edges if g[trg_idx, src_idx] == 1: self.mutual_edge_counts[find_key(self.mutual_edge_counts, (rel, r))] -= 1 # decrement two-paths for x-src-trg and src-trg-y self.two_path_counts[(r, rel)] -= self.in_degs[r][0,src_idx] self.two_path_counts[(rel, r)] -= self.out_degs[r][0,trg_idx] # triplet cache members for r2, g2 in list(self.graphs.items()): if rel == 'hypernym' and r2 == 'co_hypernym': continue # decrement transitive closures from two-paths src-x-trg if self.out_degs[r][0,src_idx] > 0 and self.in_degs[r2][0,trg_idx] > 0: r_r2_betweens = (g[src_idx] * g2[:,trg_idx]).sum() self.transitive_closure_counts[(r, r2, rel)] -= r_r2_betweens # decrement directed triangle count if self.out_degs[r2][0,trg_idx] > 0 and self.in_degs[r][0,src_idx] > 0: r_r2_cycles = (g2[trg_idx] * g[:,src_idx]).sum() rs_key = find_cyclic_key(self.directed_triangle_counts, (r, rel, r2)) self.directed_triangle_counts[rs_key] -= r_r2_cycles # decrement src's out_degree and trg's in_degree in rel and update all related caches self.out_degs[rel][0,src_idx] -= 1 self.in_degs[rel][0,trg_idx] -= 1 if update_feats: # recompute heavy cache updates from raw counts self.update_stars_cache_from_edge(rel, src_idx, trg_idx, added=False) # update features from caches self.update_features() if not permanent and report_feat_diff: timeprint('changed feature values:') diff_keys = [k for k in self.feature_vals if self.feature_vals[k] != cached_feats[k]] if len(diff_keys) > 0: print('\n'.join(['{}: from {} to {}'\ .format(k, cached_feats[k], self.feature_vals[k]) for k in diff_keys])) if permanent: # remove actual edge self.graphs[rel][src_idx,trg_idx] = 0 else: if rel == 'hypernym': self.zero_all_feats('co_hypernym') # prepare return value ret = self.ergm_score() # revert everything self.reread_cache(cached_cache) self.feature_vals = cached_feats # return prepared score return ret
def add_edge(self, src_idx, trg_idx, rel, permanent=False, caches=None, report_feat_diff=False): """ Uses cache to update feature values and produce score :param src_idx: index of source node from edge to add :param trg_idx: index of target node from edge to add :param rel: relation type :param permanent: True if node assignment to remain as is (inference mode, or restitution) :param cache: optional - precomputed backup members (cache, features) :return: new ergm score """ # back cache up if caches is not None: backup_cache = copy.deepcopy(caches[0]) backup_feats = caches[1] elif not permanent: backup_cache = copy.deepcopy(self.cache) backup_feats = copy.deepcopy(self.feature_vals) else: backup_cache = None backup_feats = None # update cache members # increment edge count for r self.edge_counts[rel] += 1 # pair cache members for r,g in list(self.graphs.items()): if rel == 'hypernym' and r == 'co_hypernym': continue # increment mutual edge count for pairs with trg-src edges if g[trg_idx, src_idx] == 1: self.mutual_edge_counts[find_key(self.mutual_edge_counts, (rel, r))] += 1 # increment two-paths for x-src-trg and src-trg-y self.two_path_counts[(r, rel)] += self.in_degs[r][0,src_idx] self.two_path_counts[(rel, r)] += self.out_degs[r][0,trg_idx] # triplet cache members for r2, g2 in list(self.graphs.items()): if rel == 'hypernym' and r2 == 'co_hypernym': continue # increment transitive closures from two-paths src-x-trg if self.out_degs[r][0,src_idx] > 0 and self.in_degs[r2][0,trg_idx] > 0: r_r2_betweens = (g[src_idx] * g2[:,trg_idx]).sum() self.transitive_closure_counts[(r, r2, rel)] += r_r2_betweens # increment directed triangle count if self.out_degs[r2][0,trg_idx] > 0 and self.in_degs[r][0,src_idx] > 0: r_r2_cycles = (g2[trg_idx] * g[:,src_idx]).sum() rs_key = find_cyclic_key(self.directed_triangle_counts, (r, rel, r2)) self.directed_triangle_counts[rs_key] += r_r2_cycles # increment src's out_degree and trg's in_degree in rel and update all related caches self.out_degs[rel][0,src_idx] += 1 self.in_degs[rel][0,trg_idx] += 1 self.update_stars_cache_from_edge(rel, src_idx, trg_idx) # update features from caches self.update_features() if rel == 'hypernym': self.zero_all_feats('co_hypernym') if report_feat_diff: timeprint('changed feature values:') diff_keys = [k for k in self.feature_vals if self.feature_vals[k] != backup_feats[k]] print('\n'.join(['{}: from {} to {}'\ .format(k, backup_feats[k], self.feature_vals[k]) for k in diff_keys])) # compute score for loss ret = self.ergm_score() if permanent: # add actual edge self.graphs[rel][src_idx,trg_idx] = 1 # update score self.dy_score = ret self.score = ret.scalar_value() self.score_is_stale = False else: self.reread_cache(backup_cache) self.feature_vals = backup_feats return ret
def add_edge(self, src_idx, trg_idx, rel, permanent=False, caches=None, report_feat_diff=False): """ Uses cache to update feature values and produce score :param src_idx: index of source node from edge to add :param trg_idx: index of target node from edge to add :param rel: relation type :param permanent: True if node assignment to remain as is (inference mode, or restitution) :param cache: optional - precomputed backup members (cache, features) :return: new ergm score """ # back cache up if caches is not None: backup_cache = copy.deepcopy(caches[0]) backup_feats = caches[1] elif not permanent: backup_cache = copy.deepcopy(self.cache) backup_feats = copy.deepcopy(self.feature_vals) else: backup_cache = None backup_feats = None # update cache members # increment edge count for r self.edge_counts[rel] += 1 # pair cache members for r, g in list(self.graphs.items()): if rel == 'hypernym' and r == 'co_hypernym': continue # increment mutual edge count for pairs with trg-src edges if g[trg_idx, src_idx] == 1: self.mutual_edge_counts[find_key(self.mutual_edge_counts, (rel, r))] += 1 # increment two-paths for x-src-trg and src-trg-y self.two_path_counts[(r, rel)] += self.in_degs[r][0, src_idx] self.two_path_counts[(rel, r)] += self.out_degs[r][0, trg_idx] # triplet cache members for r2, g2 in list(self.graphs.items()): if rel == 'hypernym' and r2 == 'co_hypernym': continue # increment transitive closures from two-paths src-x-trg if self.out_degs[r][0, src_idx] > 0 and self.in_degs[r2][ 0, trg_idx] > 0: r_r2_betweens = (g[src_idx] * g2[:, trg_idx]).sum() self.transitive_closure_counts[(r, r2, rel)] += r_r2_betweens # increment directed triangle count if self.out_degs[r2][0, trg_idx] > 0 and self.in_degs[r][ 0, src_idx] > 0: r_r2_cycles = (g2[trg_idx] * g[:, src_idx]).sum() rs_key = find_cyclic_key(self.directed_triangle_counts, (r, rel, r2)) self.directed_triangle_counts[rs_key] += r_r2_cycles # increment src's out_degree and trg's in_degree in rel and update all related caches self.out_degs[rel][0, src_idx] += 1 self.in_degs[rel][0, trg_idx] += 1 self.update_stars_cache_from_edge(rel, src_idx, trg_idx) # update features from caches self.update_features() if rel == 'hypernym': self.zero_all_feats('co_hypernym') if report_feat_diff: timeprint('changed feature values:') diff_keys = [ k for k in self.feature_vals if self.feature_vals[k] != backup_feats[k] ] print('\n'.join(['{}: from {} to {}'\ .format(k, backup_feats[k], self.feature_vals[k]) for k in diff_keys])) # compute score for loss ret = self.ergm_score() if permanent: # add actual edge self.graphs[rel][src_idx, trg_idx] = 1 # update score self.dy_score = ret self.score = ret.scalar_value() self.score_is_stale = False else: self.reread_cache(backup_cache) self.feature_vals = backup_feats return ret
parser.add_argument("--regularize", type=float, default=0.1) parser.add_argument("--neg-samp", type=int, default=M3GM_DEFAULT_NEGS, help="number of negative samples") parser.add_argument("--no-assoc-bp", action='store_true', help="do not backprop into association model") parser.add_argument("--dropout", type=float, default=0.0, help="dropout for association model only, set to 0.0 in no-assoc-bp mode") # testing parser.add_argument("--rule-override", type=bool, default=True, help="rule-based override for symmetric relations") parser.add_argument("--rerank", type=int, default=100, help="number of top results to rerank") opts = parser.parse_args() # init start_time = datetime.now() # reporting timeprint('graphs file = {}'.format(opts.input)) timeprint('embeddings file = {}'.format(opts.embeddings if opts.embeddings is not None \ else 'of size {}'.format(opts.emb_size))) timeprint('association mode = {}'.format(opts.assoc_mode)) timeprint('reranker output file = {}'.format(opts.rerank_out)) if opts.model is None: timeprint('model output file = {}, only init = {}'.format(opts.model_out, opts.model_only_init)) timeprint('epochs = {}'.format(opts.epochs)) timeprint('Adagrad learning rate = {}'.format(opts.learning_rate)) timeprint('neg-samp = {}'.format(opts.neg_samp)) timeprint('rand-nodes = {}'.format(opts.rand_nodes)) timeprint('dropout = {}'.format(opts.dropout)) timeprint('regularizer labmda = {}'.format(opts.regularize)) else: timeprint('model file = {}, ergm model file = {}'.format(opts.model, opts.ergm_model)) if opts.ergm_model is not None:
def __init__(self, graphs, embeddings, mode=TRANSLATIONAL_EMBED_MODE, dropout=0.0, model_path=None): """ :param graphs: dictionary of <relation:CSR-format graph>s, node-aligned :param embeddings: list of numpy array embeddings, indices aligned to nodes :param mode: mode of calculating association score, options: {} """.format(MODES_STR) # input validation graph_sizes = list(set([g.shape[0] for g in list(graphs.values())])) assert len(graph_sizes) == 1 assert len(embeddings) == graph_sizes[0], '{} != {}'.format( len(embeddings), graph_sizes[0]) # raw members self.graphs = { canonicalize_name(k): g for k, g in list(graphs.items()) } self.mode = mode # documenationy members self.relation_names = sorted(self.graphs.keys()) if 'co_hypernym' in self.relation_names: self.relation_names.remove('co_hypernym') self.vocab_size = graph_sizes[0] self.R = len(self.relation_names) self.emb_dim = len(embeddings[0]) self.dropout = dropout # model members self.model = dy.Model() # TODO consider using no_update param for embeddings self.embeddings = self.model.add_lookup_parameters( (self.vocab_size, self.emb_dim)) self.embeddings.init_from_array(embeddings) # init association parameter self.no_assoc = False # so can be overriden in inheritors # first determine if self.mode == BILINEAR_MODE: # full-rank bilinear matrix assoc_dim = (self.emb_dim, self.emb_dim) elif self.mode == DIAG_RANK1_MODE: # diagonal bilinear matrix + rank 1 matrix # first row = diagonal # second row = 'source factor' # third row = 'target factor' assoc_dim = (3, self.emb_dim) elif self.mode == TRANSLATIONAL_EMBED_MODE: # additive relational vector assoc_dim = self.emb_dim elif self.mode == DISTMULT: # diagonal bilinear matrix assoc_dim = self.emb_dim else: raise ValueError('unsupported mode: {}. allowed are {}'\ .format(self.mode, ', '.join(MODES_STR))) # init actual parameter self.word_assoc_weights = { r: self.model.add_parameters(assoc_dim) for r in self.relation_names } if model_path is not None: self.model.populate(model_path + '.dyn') timeprint('finished initialization for association model.')
def eval(prev_graphs, graphs, ergm, opts, N, log_file, rerank_file): writing = log_file is not None caches = (copy.deepcopy(ergm.cache), copy.deepcopy(ergm.feature_vals)) rel_all_ranks = {} # for final results rel_pre_ranks = {} # for improvement analysis rel_erg_ranks = {} # for ergm-alone analysis all_pre_ranks = [] all_all_ranks = [] all_erg_ranks = [] insts = Counter() total_misses = Counter() overrides = Counter() rerank_ups = Counter() rerank_downs = Counter() erg_ups = Counter() erg_downs = Counter() rerank_diff = Counter() erg_diff = Counter() change_idx = 1 rels_order = list(graphs.items()) for rel, te_gr in rels_order: if rel == 'co_hypernym': continue # set up if writing: timeprint('testing relation {}'.format(rel)) log_file.write('relation: {}\n'.format(rel)) # add incrementally, eval each edge, revert tr_gr = prev_graphs[rel] # to filter known connections s_assoc_cache = ergm.source_ranker_cache(rel) t_assoc_cache = ergm.target_ranker_cache(rel) override_rel = opts.rule_override and rel in SYMMETRIC_RELATIONS all_ranks = [] pre_ranks = [] erg_ranks = [] if override_rel and writing: log_file.write('RELATION OVERRIDE\n') node_order = list(range(N)) # DO NOT RANDOMIZE THIS - NEED TO PREDICT BOTH SIDES for node in tqdm(node_order): s_trues, s_unch_loc_ranks, s_loc_gold_ranks, s_gold_reranked, s_gold_ergs, s_pls, change_idx = \ node_loop(change_idx, ergm, rel, node, s_assoc_cache, caches, tr_gr, te_gr, override_rel, opts.rerank, True, log_file, rerank_file) t_trues, t_unch_loc_ranks, t_loc_gold_ranks, t_gold_reranked, t_gold_ergs, t_pls, change_idx = \ node_loop(change_idx, ergm, rel, node, t_assoc_cache, caches, tr_gr, te_gr, override_rel, opts.rerank, False, log_file, rerank_file) total_trues = s_trues + t_trues insts[rel] += (total_trues) if override_rel: overrides[rel] += total_trues ulr = s_unch_loc_ranks + t_unch_loc_ranks lgr = s_loc_gold_ranks + t_loc_gold_ranks grr = s_gold_reranked + t_gold_reranked ger = s_gold_ergs + t_gold_ergs total_misses[rel] += (len(ulr)) pre_ranks.extend(lgr) if override_rel: erg_ranks.extend(lgr) all_ranks.extend(lgr) else: all_ranks.extend(ulr + grr) erg_ranks.extend(ulr + ger) for pl in s_pls + t_pls: if pl[3] < pl[2]: rerank_ups[rel] += 1 if pl[3] > pl[2]: rerank_downs[rel] += 1 if pl[4] < pl[2]: erg_ups[rel] += 1 if pl[4] > pl[2]: erg_downs[rel] += 1 rerank_diff[rel] += (pl[2] - pl[3]) erg_diff[rel] += (pl[2] - pl[4]) rel_all_ranks[rel] = all_ranks rel_pre_ranks[rel] = pre_ranks rel_erg_ranks[rel] = erg_ranks all_all_ranks.extend(all_ranks) all_pre_ranks.extend(pre_ranks) all_erg_ranks.extend(erg_ranks) if writing: log_file.write('\nper relation:\n') for rel in list(graphs.keys()): if insts[rel] > 0 and insts[rel] - total_misses[rel] > 0: log_file.write('\n{}:\n'.format(rel)) log_file.write('{} instances, {} misses\n'.format(insts[rel], total_misses[rel])) log_file.write('reranks: {} up, {} down\n'.format(rerank_ups[rel], rerank_downs[rel])) log_file.write('ERGM only: {} up, {} down\n'.format(erg_ups[rel], erg_downs[rel])) log_file.write('rank diff: {}, ERGM only: {}\n'.format(rerank_diff[rel], erg_diff[rel])) log_file.write('metrics: pre-rank\trerank\tERGM only\n') log_file.write('average rank: {:.5f}\t{:.5f}\t{:.5f}\n'.format(np.average(rel_pre_ranks[rel]), np.average(rel_all_ranks[rel]), np.average(rel_erg_ranks[rel]))) log_file.write('mrr: {:.4f}\t{:.4f}\t{:.4f}\n'.format(mrr(rel_pre_ranks[rel]), mrr(rel_all_ranks[rel]), mrr(rel_erg_ranks[rel]))) log_file.write( 'mq: {:.4f}\t{:.4f}\t{:.4f}\n'.format(mq(rel_pre_ranks[rel], N), mq(rel_all_ranks[rel], N), mq(rel_erg_ranks[rel], N))) log_file.write('h@100: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(rel_pre_ranks[rel], n=100), h_at_n(rel_all_ranks[rel], n=100), h_at_n(rel_erg_ranks[rel], n=100))) log_file.write( 'h@10: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(rel_pre_ranks[rel]), h_at_n(rel_all_ranks[rel]), h_at_n(rel_erg_ranks[rel]))) log_file.write('h@1: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(rel_pre_ranks[rel], n=1), h_at_n(rel_all_ranks[rel], n=1), h_at_n(rel_erg_ranks[rel], n=1))) log_file.write('\ntotals:\n') log_file.write('total number of instances: {}\n'.format(sum(insts.values()))) log_file.write('total misses: {}\n'.format(sum(total_misses.values()))) log_file.write('overrides: {}\n'.format(sum(overrides.values()))) log_file.write( 'rerank improvements: {}; regressions: {}\n'.format(sum(rerank_ups.values()), sum(rerank_downs.values()))) log_file.write( 'only ERGM improvements: {}; regressions: {}\n'.format(sum(erg_ups.values()), sum(erg_downs.values()))) log_file.write( 'total rank diffs: rerank {}, only ERGM {}\n'.format(sum(rerank_diff.values()), sum(erg_diff.values()))) log_file.write('metrics: pre-rank\trerank\tERGM only\n') log_file.write( 'average rank: {:.5f}\t{:.5f}\t{:.5f}\n'.format(np.average(all_pre_ranks), np.average(all_all_ranks), np.average(all_erg_ranks))) log_file.write( 'mrr: {:.4f}\t{:.4f}\t{:.4f}\n'.format(mrr(all_pre_ranks), mrr(all_all_ranks), mrr(all_erg_ranks))) log_file.write( 'mq: {:.4f}\t{:.4f}\t{:.4f}\n'.format(mq(all_pre_ranks, N), mq(all_all_ranks, N), mq(all_erg_ranks, N))) log_file.write( 'h@100: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(all_pre_ranks, n=100), h_at_n(all_all_ranks, n=100), h_at_n(all_erg_ranks, n=100))) log_file.write('h@10: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(all_pre_ranks), h_at_n(all_all_ranks), h_at_n(all_erg_ranks))) log_file.write('h@1: {:.5f}\t{:.5f}\t{:.5f}\n'.format(h_at_n(all_pre_ranks, n=1), h_at_n(all_all_ranks, n=1), h_at_n(all_erg_ranks, n=1))) print('number of instances:', sum(insts.values())) print('total misses:', sum(total_misses.values())) print('overrides:', sum(overrides.values())) print('average rank:', np.average(all_all_ranks)) print('mrr: {:.4f}'.format(mrr(all_all_ranks))) print('mq:', mq(all_all_ranks, N)) print('h@100: {:.5f}'.format(h_at_n(all_all_ranks, n=100))) print('h@10: {:.5f}'.format(h_at_n(all_all_ranks))) print('h@1: {:.5f}'.format(h_at_n(all_all_ranks, n=1))) return mrr(all_all_ranks), h_at_n(all_all_ranks, n=10), h_at_n(all_all_ranks, n=3), h_at_n(all_all_ranks, n=1)
def macro_node_iteration(opts, multi_graph, assoc_cache, trainer, log_file, synsets, rel, src_i, use_assoc): """ One node-relation iteration in a macro-level pass over the multigraph :param opts: parameter dictionary from calling model :param multi_graph: trained data structure :param assoc_cache: cache for association model :param trainer: dynet training module :param log_file: log file location :param synsets: synset name dictionary for reporting :param rel: relation type for iteration :param src_i: source node ID for iteration :param use_assoc: use association score model :return: state of cache after iteration """ g = multi_graph.graphs[rel] N = multi_graph.vocab_size # set up iteration if opts.debug: dy.renew_cg(immediate_compute=True, check_validity=True) else: dy.renew_cg() # keep existing score for all deltas multi_graph.rescore() score_with_all = multi_graph.dy_score # report progress perform_verbosity_steps = opts.v > 1 or (opts.v > 0 and src_i > 0 and src_i % 10 == 0) if perform_verbosity_steps: timeprint('iterating on node {}, {}, current score = {:.6f}'\ .format(src_i, synsets[src_i], score_with_all.scalar_value())) # true targets scoring true_targets = targets(g, src_i) if len(true_targets) == 0: # don't perform negative sampling without true targets return assoc_cache # compute log likelihood on targets # each used to be multiplied by multi_graph.a_scale target_assoc_scores = { t: multi_graph.word_assoc_score(src_i, t, rel) for t in true_targets } if opts.no_assoc_bp: # turn into values to detach from computation graph target_assoc_scores = { t: t_as.value() for t, t_as in list(target_assoc_scores.items()) } target_scores = { t: score_with_all + t_as for t, t_as in list(target_assoc_scores.items()) } # false targets scoring - importance sampling # compute softmax over all false targets based on bilinear scores if use_assoc: assoc_sc = multi_graph.score_from_source_cache(assoc_cache, src_i) neg_assocs = { j: s for j, s in enumerate(assoc_sc) if j not in true_targets and j != src_i } else: neg_assocs = { j: 1.0 for j in range(N) if j not in true_targets and j != src_i } neg_probs = softmaxify(neg_assocs) # collect negative samples # TODO see if searchsorted can work here too (issue in dynet repo) neg_samples = {t: [dy.np.random.choice(range(len(neg_assocs)), p=neg_probs)\ for _ in range(opts.neg_samp)]\ for t in true_targets} # sample without return? # for reporting if perform_verbosity_steps: neg_sample_idcs = [] for negs in list(neg_samples.values()): neg_sample_idcs.extend([list(neg_assocs.keys())[j] for j in negs]) # compute neg log likelihood on negative samples margins = [] for t in true_targets: t_score = target_scores[t] negs = [list(neg_assocs.keys())[j] for j in neg_samples[t]] # each used to be multiplied by multi_graph.a_scale neg_assoc_scores = [ multi_graph.word_assoc_score(src_i, j, rel) for j in negs ] if opts.no_assoc_bp: # turn into values to detach from computation graph neg_assoc_scores = [s.value() for s in neg_assoc_scores] # prepare graph for pass multi_graph.remove_edge(src_i, t, rel, permanent=True) t_cache = (copy.deepcopy(multi_graph.cache), copy.deepcopy(multi_graph.feature_vals)) for jas, j, origj in zip(neg_assoc_scores, negs, neg_samples[t]): q_norm = 1.0 / neg_probs[origj] g_score = multi_graph.add_edge(src_i, j, rel, caches=t_cache, report_feat_diff=opts.v > 1) margins.append( dy.rectify(g_score + jas + MARGIN - t_score) * q_norm) log_file.write('{}\t{}\t{}\t{}\t{:.2e}\t{:.2e}\t{:.2e}\n'\ .format(rel, src_i, t, j, t_score.scalar_value(), g_score.scalar_value(), jas if type(jas) == float else jas.value())) # revert graph for next margin iteration multi_graph.add_edge(src_i, t, rel, permanent=True) node_loss = dy.esum(margins) # backprop and recompute score if perform_verbosity_steps: timeprint('selected nodes {} with probabilities {}'\ .format(neg_sample_idcs, ['{:.2e}'.format(neg_probs[n]) for n in neg_samples])) timeprint('overall {} loss = {:.6f}'\ .format('margin' if opts.margin_loss else 'neg log', node_loss.scalar_value())) # record state for later reporting pre_weights = multi_graph.ergm_weights.as_array() pre_assoc = multi_graph.word_assoc_weights[rel].as_array() # add regularization if multi_graph.regularize > 0.0: node_loss += multi_graph.regularize * dy.l2_norm( dy.parameter(multi_graph.ergm_weights)) # perform actual learning node_loss.backward() trainer.update() if perform_verbosity_steps: post_weights = multi_graph.ergm_weights.as_array() post_assoc = multi_graph.word_assoc_weights[rel].as_array() w_diff = post_weights - pre_weights a_diff = post_assoc - pre_assoc timeprint('changed weights = {}'.format(len(w_diff.nonzero()[0]))) timeprint('changed pre_assoc = {}, norm {}'\ .format(len(a_diff.nonzero()[0]), np.linalg.norm(a_diff))) # recompute assoc_cache columns for src_i and participating targets if use_assoc and not opts.no_assoc_bp: # TODO normalize embeddings? return multi_graph.source_ranker_cache(rel) return assoc_cache
def lemmas(s): if ALL_LEMMAS: name = '_'.join(s.lemma_names()) else: name = s.lemma_names()[0] if LOWER: name = name.lower() return name.split('_') if __name__ == '__main__': if len(sys.argv) < 3: timeprint( 'usage: embed_from_words.py input_embs output_embs [WN prediction dataset]' ) exit(1) in_file = sys.argv[1] # create target dataset if len(sys.argv) > 3: # third param is WN dataset wn_vocab = load_prediction_dataset(sys.argv[3])[-1] synsets = [wn.synset(w) for w in wn_vocab] else: synsets = list(wn.all_synsets()) timeprint('read {} synsets'.format(len(synsets))) target_words = set()