def get_words(expn, parent, lmk=None, rel=None): words = [] probs = [] entropy = [] for n in expn.split(): if n in NONTERMINALS: if n == parent == 'LANDMARK-PHRASE': # we need to move to the parent landmark lmk = parent_landmark(lmk) # we need to keep expanding expansion, exp_prob, exp_ent = get_expansion(n, parent, lmk, rel) w, w_prob, w_ent = get_words(expansion, n, lmk, rel) words.append(w) probs.append(exp_prob * w_prob) entropy.append(exp_ent + w_ent) else: # get word for POS w_db = Word.get_words(pos=n, lmk=lmk_id(lmk), rel=rel_type(rel)) counter = collections.Counter(w_db) keys, counts = zip(*counter.items()) counts = np.array(counts) counts /= counts.sum() w, w_prob, w_entropy = categorical_sample(keys, counts) words.append(w.word) probs.append(w.prob) entropy.append(w_entropy) p, H = np.prod(probs), np.sum(entropy) print 'expanding %s to %s (p: %f, H: %f)' % (expn, words, p, H) return words, p, H
def remove_expansion(limit, lhs, rhs, parent=None, lmk=None, rel=None): return Production.delete_productions(limit, lhs=lhs, rhs=rhs, parent=parent, lmk=lmk_id(lmk), rel=rel_type(rel))
def get_tree_prob(tree, lmk=None, rel=None): prob = 1.0 if len(tree.productions()) == 1: # if this tree only has one production # it means that its child is a terminal (word) word = tree[0] pos = tree.node p = WordCPT.probability(word=word, pos=pos, lmk=lmk_id(lmk), rel=rel_type(rel)) print p, pos, '->', word, m2s(lmk,rel) prob *= p else: lhs = tree.node rhs = ' '.join(n.node for n in tree) parent = tree.parent().node if tree.parent() else None if lhs == 'RELATION': # everything under a RELATION node should ignore the landmark lmk = None elif lhs == 'LANDMARK-PHRASE': # everything under a LANDMARK-PHRASE node should ignore the relation rel = None if parent == 'LANDMARK-PHRASE': # if the current node is a LANDMARK-PHRASE and the parent node # is also a LANDMARK-PHRASE then we should move to the parent # of the current landmark lmk = parent_landmark(lmk) if not parent: # LOCATION-PHRASE has no parent and is not related to lmk and rel p = ExpansionCPT.probability(rhs=rhs, lhs=lhs) print p, repr(lhs), '->', repr(rhs) else: p = ExpansionCPT.probability(rhs=rhs, lhs=lhs, parent=parent, lmk=lmk_id(lmk), rel=rel_type(rel)) print p, repr(lhs), '->', repr(rhs), 'parent=%r'%parent, m2s(lmk,rel) prob *= p # call get_tree_prob recursively for each subtree for subtree in tree: prob *= get_tree_prob(subtree, lmk, rel) return prob
def get_words(terminals, landmarks, rel=None): words = [] probs = [] entropy = [] for n,lmk in zip(terminals, landmarks): # if we could not get an expansion for the LHS, we just pass down the unexpanded nonterminal symbol # it gets the probability of 1 and entropy of 0 if n in NONTERMINALS: words.append(n) probs.append(1.0) entropy.append(0.0) continue lmk_class = (lmk.object_class if lmk else None) lmk_color = (lmk.color if lmk else None) rel_class = rel_type(rel) dist_class = (rel.measurement.best_distance_class if hasattr(rel, 'measurement') else None) deg_class = (rel.measurement.best_degree_class if hasattr(rel, 'measurement') else None) cp_db = CWord.get_word_counts(pos=n, lmk_class=lmk_class, lmk_ori_rels=get_lmk_ori_rels_str(lmk), lmk_color=lmk_color, rel=rel_class, rel_dist_class=dist_class, rel_deg_class=deg_class) if cp_db.count() <= 0: logger( 'Could not expand %s (lmk_class: %s, lmk_color: %s, rel: %s, dist_class: %s, deg_class: %s)' % (n, lmk_class, lmk_color, rel_class, dist_class, deg_class) ) terminals.append( n ) continue logger( 'Expanded %s (lmk_class: %s, lmk_color: %s, rel: %s, dist_class: %s, deg_class: %s)' % (n, lmk_class, lmk_color, rel_class, dist_class, deg_class) ) ckeys, ccounts = zip(*[(cword.word,cword.count) for cword in cp_db.all()]) ccounter = {} for cword in cp_db.all(): if cword.word in ccounter: ccounter[cword.word] += cword.count else: ccounter[cword.word] = cword.count ckeys, ccounts = zip(*ccounter.items()) # print 'ckeys', ckeys # print 'ccounts', ccounts ccounts = np.array(ccounts, dtype=float) ccounts /= ccounts.sum() w, w_prob, w_entropy = categorical_sample(ckeys, ccounts) words.append(w) probs.append(w_prob) entropy.append(w_entropy) p, H = np.prod(probs), np.sum(entropy) # print 'expanding %s to %s (p: %f, H: %f)' % (terminals, words, p, H) return words, p, H
def update_word_counts(update, pos, word, lmk_class=None, lmk_ori_rels=None, lmk_color=None, rel=None): CWord.update_word_counts(update=update, pos=pos, word=word, lmk_class=lmk_class, lmk_ori_rels=lmk_ori_rels, lmk_color=lmk_color, rel=rel_type(rel), rel_dist_class=(rel.measurement.best_distance_class if hasattr(rel, 'measurement') else None), rel_deg_class=(rel.measurement.best_degree_class if hasattr(rel, 'measurement') else None))
def update_expansion_counts(update, lhs, rhs, parent=None, lmk_class=None, lmk_ori_rels=None, lmk_color=None, rel=None): CProduction.update_production_counts(update=update, lhs=lhs, rhs=rhs, parent=parent, lmk_class=lmk_class, lmk_ori_rels=lmk_ori_rels, lmk_color=lmk_color, rel=rel_type(rel), dist_class=(rel.measurement.best_distance_class if hasattr(rel, 'measurement') else None), deg_class=(rel.measurement.best_degree_class if hasattr(rel, 'measurement') else None))
def get_expansion(lhs, parent=None, lmk=None, rel=None): p_db = Production.get_productions(lhs=lhs, parent=parent, lmk=lmk_id(lmk), rel=rel_type(rel)) counter = collections.Counter(p_db) keys, counts = zip(*counter.items()) counts = np.array(counts) counts /= counts.sum() prod, prod_prob, prod_entropy = categorical_sample(keys, counts) print 'expanding:', prod, prod_prob, prod_entropy return prod.rhs, prod_prob, prod_entropy
def delete_word(limit, terminals, words, lmk=None, rel=None): num_deleted = [] for term, word in zip(terminals, words): # get word for POS num_deleted.append( Word.delete_words(limit, pos=term, word=word, lmk=lmk_id(lmk), rel=rel_type(rel))) return num_deleted
def save_tree(tree, loc, rel, lmk, parent=None): if len(tree.productions()) == 1: # if this tree only has one production # it means that its child is a terminal (word) word = Word() word.word = tree[0] word.pos = tree.node word.parent = parent word.location = loc else: prod = Production() prod.lhs = tree.node prod.rhs = ' '.join(n.node for n in tree) prod.parent = parent prod.location = loc # some productions are related to semantic representation if prod.lhs == 'RELATION': prod.relation = rel_type(rel) if hasattr(rel, 'measurement'): prod.relation_distance_class = rel.measurement.best_distance_class prod.relation_degree_class = rel.measurement.best_degree_class elif prod.lhs == 'LANDMARK-PHRASE': prod.landmark = lmk_id(lmk) prod.landmark_class = lmk.object_class prod.landmark_orientation_relations = get_lmk_ori_rels_str(lmk) prod.landmark_color = lmk.color # next landmark phrase will need the parent landmark lmk = parent_landmark(lmk) elif prod.lhs == 'LANDMARK': # LANDMARK has the same landmark as its parent LANDMARK-PHRASE prod.landmark = parent.landmark prod.landmark_class = parent.landmark_class prod.landmark_orientation_relations = parent.landmark_orientation_relations prod.landmark_color = parent.landmark_color # save subtrees, keeping track of parent for subtree in tree: save_tree(subtree, loc, rel, lmk, prod)
def update_word_counts(update, pos, word, prev_word='<no prev word>', lmk_class=None, lmk_ori_rels=None, lmk_color=None, rel=None): CWord.update_word_counts( update=update, pos=pos, word=word, prev_word=prev_word, lmk_class=lmk_class, lmk_ori_rels=lmk_ori_rels, lmk_color=lmk_color, rel=rel_type(rel), rel_dist_class=(rel.measurement.best_distance_class if hasattr( rel, 'measurement') else None), rel_deg_class=(rel.measurement.best_degree_class if hasattr( rel, 'measurement') else None))
def update_expansion_counts(update, lhs, rhs, parent=None, lmk_class=None, lmk_ori_rels=None, lmk_color=None, rel=None): CProduction.update_production_counts( update=update, lhs=lhs, rhs=rhs, parent=parent, lmk_class=lmk_class, lmk_ori_rels=lmk_ori_rels, lmk_color=lmk_color, rel=rel_type(rel), dist_class=(rel.measurement.best_distance_class if hasattr( rel, 'measurement') else None), deg_class=(rel.measurement.best_degree_class if hasattr( rel, 'measurement') else None))
def train(model, triples, ent_num): logging.info("Start Training...") logging.info("batch_size = %d" % config.batch_size) logging.info("dim = %d" % config.ent_dim) logging.info("gamma = %f" % config.gamma) current_lr = config.learning_rate train_triples, valid_triples, test_triples, symmetry_test, inversion_test, composition_test, others_test = triples all_true_triples = train_triples + valid_triples + test_triples r_tp = rel_type(train_triples) optimizer = get_optim("Adam", model, current_lr) if config.init_checkpoint: logging.info("Loading checkpoint...") checkpoint = torch.load(os.path.join(config.save_path, "checkpoint"), map_location=torch.device("cuda:0")) init_step = checkpoint["step"] + 1 model.load_state_dict(checkpoint["model_state_dict"]) if config.use_old_optimizer: current_lr = checkpoint["current_lr"] optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) else: init_step = 1 true_all_heads, true_all_tails = get_true_ents(all_true_triples) train_iterator = train_data_iterator(train_triples, ent_num) test_data_list = test_data_sets(valid_triples, true_all_heads, true_all_tails, ent_num, r_tp) max_mrr = 0.0 training_logs = [] modes = ["Prediction Head", "Prediction Tail"] rtps = ["1-1", "1-M", "M-1", "M-M"] # Training Loop for step in range(init_step, config.max_step + 1): log = train_step(model, optimizer, next(train_iterator)) training_logs.append(log) # log if step % config.log_step == 0: metrics = {} for metric in training_logs[0].keys(): metrics[metric] = sum([log[metric] for log in training_logs ]) / len(training_logs) log_metrics("Training", step, metrics) training_logs.clear() # valid if step % config.valid_step == 0: logging.info("-" * 10 + "Evaluating on Valid Dataset" + "-" * 10) metrics = test_step(model, test_data_list, True) log_metrics("Valid", step, metrics[0]) cnt_mode_rtp = 1 for mode in modes: for rtp in rtps: logging.info("-" * 10 + mode + "..." + rtp + "-" * 10) log_metrics("Valid", step, metrics[cnt_mode_rtp]) cnt_mode_rtp += 1 if metrics[0]["MRR"] >= max_mrr: max_mrr = metrics[0]["MRR"] save_variable_list = { "step": step, "current_lr": current_lr, } save_model(model, optimizer, save_variable_list) if step / config.max_step in [0.2, 0.5, 0.8]: current_lr *= 0.1 logging.info("Change learning_rate to %f at step %d" % (current_lr, step)) optimizer = get_optim("Adam", model, current_lr) # load best state checkpoint = torch.load(os.path.join(config.save_path, "checkpoint")) model.load_state_dict(checkpoint["model_state_dict"]) step = checkpoint["step"] # relation patterns test_datasets = [ symmetry_test, inversion_test, composition_test, others_test ] test_datasets_str = ["Symmetry", "Inversion", "Composition", "Other"] for i in range(len(test_datasets)): dataset = test_datasets[i] dataset_str = test_datasets_str[i] if len(dataset) == 0: continue test_data_list = test_data_sets(dataset, true_all_heads, true_all_tails, ent_num, r_tp) logging.info("-" * 10 + "Evaluating on " + dataset_str + " Dataset" + "-" * 10) metrics = test_step(model, test_data_list) log_metrics("Valid", step, metrics) # finally test test_data_list = test_data_sets(test_triples, true_all_heads, true_all_tails, ent_num, r_tp) logging.info("----------Evaluating on Test Dataset----------") metrics = test_step(model, test_data_list, True) log_metrics("Test", step, metrics[0]) cnt_mode_rtp = 1 for mode in modes: for rtp in rtps: logging.info("-" * 10 + mode + "..." + rtp + "-" * 10) log_metrics("Test", step, metrics[cnt_mode_rtp]) cnt_mode_rtp += 1
def get_expansion(lhs, parent=None, lmk=None, rel=None): lhs_rhs_parent_chain = [] prob_chain = [] entropy_chain = [] terminals = [] landmarks = [] for n in lhs.split(): if n in NONTERMINALS: if n == parent == 'LANDMARK-PHRASE': # we need to move to the parent landmark lmk = parent_landmark(lmk) lmk_class = (lmk.object_class if lmk else None) lmk_ori_rels = get_lmk_ori_rels_str(lmk) lmk_color = (lmk.color if lmk else None) rel_class = rel_type(rel) dist_class = (rel.measurement.best_distance_class if hasattr(rel, 'measurement') else None) deg_class = (rel.measurement.best_degree_class if hasattr(rel, 'measurement') else None) cp_db = CProduction.get_production_counts(lhs=n, parent=parent, lmk_class=lmk_class, lmk_ori_rels=lmk_ori_rels, lmk_color=lmk_color, rel=rel_class, dist_class=dist_class, deg_class=deg_class) if cp_db.count() <= 0: logger('Could not expand %s (parent: %s, lmk_class: %s, lmk_ori_rels: %s, lmk_color: %s, rel: %s, dist_class: %s, deg_class: %s)' % (n, parent, lmk_class, lmk_ori_rels, lmk_color, rel_class, dist_class, deg_class)) terminals.append( n ) continue ckeys, ccounts = zip(*[(cprod.rhs,cprod.count) for cprod in cp_db.all()]) ccounter = {} for cprod in cp_db.all(): if cprod.rhs in ccounter: ccounter[cprod.rhs] += cprod.count else: ccounter[cprod.rhs] = cprod.count ckeys, ccounts = zip(*ccounter.items()) # print 'ckeys', ckeys # print 'ccounts', ccounts ccounts = np.array(ccounts, dtype=float) ccounts /= ccounts.sum() cprod, cprod_prob, cprod_entropy = categorical_sample(ckeys, ccounts) # print cprod, cprod_prob, cprod_entropy lhs_rhs_parent_chain.append( ( n,cprod,parent,lmk ) ) prob_chain.append( cprod_prob ) entropy_chain.append( cprod_entropy ) lrpc, pc, ec, t, ls = get_expansion( lhs=cprod, parent=n, lmk=lmk, rel=rel ) lhs_rhs_parent_chain.extend( lrpc ) prob_chain.extend( pc ) entropy_chain.extend( ec ) terminals.extend( t ) landmarks.extend( ls ) else: terminals.append( n ) landmarks.append( lmk ) return lhs_rhs_parent_chain, prob_chain, entropy_chain, terminals, landmarks
def delete_word(limit, terminals, words, lmk=None, rel=None): num_deleted = [] for term, word in zip(terminals, words): # get word for POS num_deleted.append( Word.delete_words(limit, pos=term, word=word, lmk=lmk_id(lmk), rel=rel_type(rel)) ) return num_deleted
def get_words(terminals, landmarks, rel=None, prevword=None): words = [] probs = [] alphas = [] entropy = [] C = CWord.get_count for n,lmk in zip(terminals, landmarks): # if we could not get an expansion for the LHS, we just pass down the unexpanded nonterminal symbol # it gets the probability of 1 and entropy of 0 if n in NONTERMINALS: words.append(n) probs.append(1.0) entropy.append(0.0) continue lmk_class = (lmk.object_class if lmk else None) lmk_color = (lmk.color if lmk else None) rel_class = rel_type(rel) dist_class = (rel.measurement.best_distance_class if hasattr(rel, 'measurement') else None) deg_class = (rel.measurement.best_degree_class if hasattr(rel, 'measurement') else None) meaning = dict(pos=n, lmk_class=lmk_class, lmk_ori_rels=get_lmk_ori_rels_str(lmk), lmk_color=lmk_color, rel=rel_class, rel_dist_class=dist_class, rel_deg_class=deg_class) cp_db_uni = CWord.get_word_counts(**meaning) ccounter = {} for c in cp_db_uni: ccounter[c.word] = ccounter.get(c.word, 0) + c.count ckeys, ccounts_uni = zip(*ccounter.items()) ccounts_uni = np.array(ccounts_uni, dtype=float) ccounts_uni /= ccounts_uni.sum() prev_word = words[-1] if words else prevword alpha = C(prev_word=prev_word, **meaning) / C(**meaning) alphas.append(alpha) if alpha: cp_db_bi = CWord.get_word_counts(prev_word=prev_word, **meaning) ccounter = {} for c in cp_db_bi: ccounter[c.word] = ccounter.get(c.word, 0) + c.count ccounts_bi = np.array([ccounter.get(k,0) for k in ckeys], dtype=float) ccounts_bi /= ccounts_bi.sum() cprob = (alpha * ccounts_bi) + ((1-alpha) * ccounts_uni) else: cprob = ccounts_uni # if cp_db.count() <= 0: # logger( 'Could not expand %s (lmk_class: %s, lmk_color: %s, rel: %s, dist_class: %s, deg_class: %s)' % (n, lmk_class, lmk_color, rel_class, dist_class, deg_class) ) # terminals.append( n ) # continue # ckeys, ccounts = zip(*[(cword.word,cword.count) for cword in cp_db.all()]) # ccounter = {} # for cword in cp_db.all(): # if cword.word in ccounter: ccounter[cword.word] += cword.count # else: ccounter[cword.word] = cword.count # ckeys, ccounts = zip(*ccounter.items()) # print 'ckeys', ckeys # print 'ccounts', ccounts # ccounts = np.array(ccounts, dtype=float) # ccounts /= ccounts.sum() w, w_prob, w_entropy = categorical_sample(ckeys, cprob) words.append(w) probs.append(w_prob) entropy.append(w_entropy) p, H = np.prod(probs), np.sum(entropy) # print 'expanding %s to %s (p: %f, H: %f)' % (terminals, words, p, H) return words, p, H, alphas
# convert variables to the right types xloc = float(xloc) yloc = float(yloc) loc = (xloc, yloc) parse = ParentedTree.parse(parse) modparse = ParentedTree.parse(modparse) # how many ancestors should the sampled landmark have? num_ancestors = count_lmk_phrases(modparse) - 1 # sample `args.iterations` times for each sentence for _ in xrange(args.iterations): lmk, rel = get_meaning(loc, num_ancestors) if args.verbose: print "utterance:", repr(sentence) print "location: %s" % repr(loc) print "landmark: %s (%s)" % (lmk, lmk_id(lmk)) print "relation: %s" % rel_type(rel) print "parse:" print parse.pprint() print "modparse:" print modparse.pprint() print "-" * 70 location = Location(x=xloc, y=yloc) save_tree(modparse, location, rel, lmk) Bigram.make_bigrams(location.words) Trigram.make_trigrams(location.words) session.commit()
continue # sample `args.iterations` times for each sentence for _ in xrange(args.iterations): lmk, rel = get_meaning(loc, num_ancestors) lmk, _, _ = lmk rel, _, _ = rel assert(not isinstance(lmk, tuple)) assert(not isinstance(rel, tuple)) if args.verbose: print 'utterance:', repr(sentence) print 'location: %s' % repr(loc) print 'landmark: %s (%s)' % (lmk, lmk_id(lmk)) print 'relation: %s' % rel_type(rel) print 'parse:' print parse.pprint() print 'modparse:' print modparse.pprint() print '-' * 70 location = Location(x=xloc, y=yloc) save_tree(modparse, location, rel, lmk) Bigram.make_bigrams(location.words) Trigram.make_trigrams(location.words) if i % 200 == 0: session.commit() for sentence,(parse,modparse) in unique_sentences.items(): SentenceParse.add_sentence_parse_blind(sentence, parse, modparse)
continue # sample `args.iterations` times for each sentence for _ in xrange(args.iterations): lmk, rel = get_meaning(loc, num_ancestors) lmk, _, _ = lmk rel, _, _ = rel assert(not isinstance(lmk, tuple)) assert(not isinstance(rel, tuple)) if args.verbose: print 'utterance:', repr(sentence) print 'location: %s' % repr(loc) print 'landmark: %s (%s)' % (lmk, lmk_id(lmk)) print 'relation: %s' % rel_type(rel) print 'parse:' print parse.pprint() print 'modparse:' print modparse.pprint() print '-' * 70 location = Location(x=xloc, y=yloc) save_tree(modparse, location, rel, lmk) Bigram.make_bigrams(location.words) Trigram.make_trigrams(location.words) if i % 200 == 0: session.commit() if SentenceParse.query().count() == 0: print 'BLIND ADDING!!!!!!!!!!!'
def get_tree_probs(tree, lmk=None, rel=None): lhs_rhs_parent_chain = [] prob_chain = [] entropy_chain = [] term_prods = [] lhs = tree.node if isinstance(tree[0], ParentedTree): rhs = ' '.join(n.node for n in tree) else: rhs = ' '.join(n for n in tree) parent = tree.parent.node if tree.parent else None if lhs == 'RELATION': # everything under a RELATION node should ignore the landmark lmk = None if lhs == 'LANDMARK-PHRASE': # everything under a LANDMARK-PHRASE node should ignore the relation rel = None if lhs == parent == 'LANDMARK-PHRASE': # we need to move to the parent landmark lmk = parent_landmark(lmk) lmk_class = (lmk.object_class if lmk and lhs != 'LOCATION-PHRASE' else None) lmk_ori_rels = get_lmk_ori_rels_str(lmk) if lhs != 'LOCATION-PHRASE' else None lmk_color = (lmk.color if lmk and lhs != 'LOCATION-PHRASE' else None) rel_class = rel_type(rel) if lhs != 'LOCATION-PHRASE' else None dist_class = (rel.measurement.best_distance_class if hasattr(rel, 'measurement') and lhs != 'LOCATION-PHRASE' else None) deg_class = (rel.measurement.best_degree_class if hasattr(rel, 'measurement') and lhs != 'LOCATION-PHRASE' else None) if lhs in NONTERMINALS: cp_db = CProduction.get_production_counts(lhs=lhs, parent=parent, lmk_class=lmk_class, lmk_ori_rels=lmk_ori_rels, lmk_color=lmk_color, rel=rel_class, dist_class=dist_class, deg_class=deg_class) if cp_db.count() <= 0: logger('Could not expand %s (parent: %s, lmk_class: %s, lmk_ori_rels: %s, lmk_color: %s, rel: %s, dist_class: %s, deg_class: %s)' % (lhs, parent, lmk_class, lmk_ori_rels, lmk_color, rel_class, dist_class, deg_class)) else: ckeys, ccounts = zip(*[(cprod.rhs,cprod.count) for cprod in cp_db.all()]) ccounter = {} for cprod in cp_db.all(): if cprod.rhs in ccounter: ccounter[cprod.rhs] += cprod.count else: ccounter[cprod.rhs] = cprod.count + 1 # we have never seen this RHS in this context before if rhs not in ccounter: ccounter[rhs] = 1 ckeys, ccounts = zip(*ccounter.items()) # add 1 smoothing ccounts = np.array(ccounts, dtype=float) ccount_probs = ccounts / ccounts.sum() cprod_entropy = -np.sum( (ccount_probs * np.log(ccount_probs)) ) cprod_prob = ccounter[rhs]/ccounts.sum() # logger('ckeys: %s' % str(ckeys)) # logger('ccounts: %s' % str(ccounts)) # logger('rhs: %s, cprod_prob: %s, cprod_entropy: %s' % (rhs, cprod_prob, cprod_entropy)) prob_chain.append( cprod_prob ) entropy_chain.append( cprod_entropy ) lhs_rhs_parent_chain.append( ( lhs, rhs, parent, lmk, rel ) ) for subtree in tree: pc, ec, lrpc, tps = get_tree_probs(subtree, lmk, rel) prob_chain.extend( pc ) entropy_chain.extend( ec ) lhs_rhs_parent_chain.extend( lrpc ) term_prods.extend( tps ) else: cw_db = CWord.get_word_counts(pos=lhs, lmk_class=lmk_class, lmk_ori_rels=lmk_ori_rels, lmk_color=lmk_color, rel=rel_class, rel_dist_class=dist_class, rel_deg_class=deg_class) if cw_db.count() <= 0: # we don't know the probability or entropy values for the context we have never seen before # we just update the term_prods list logger('Could not expand %s (lmk_class: %s, lmk_ori_rels: %s, lmk_color: %s, rel: %s, dist_class: %s, deg_class: %s)' % (lhs, lmk_class, lmk_ori_rels, lmk_color, rel_class, dist_class, deg_class)) else: ckeys, ccounts = zip(*[(cword.word,cword.count) for cword in cw_db.all()]) ccounter = {} for cword in cw_db.all(): if cword.word in ccounter: ccounter[cword.word] += cword.count else: ccounter[cword.word] = cword.count + 1 # we have never seen this RHS in this context before if rhs not in ccounter: ccounter[rhs] = 1 ckeys, ccounts = zip(*ccounter.items()) # logger('ckeys: %s' % str(ckeys)) # logger('ccounts: %s' % str(ccounts)) # add 1 smoothing ccounts = np.array(ccounts, dtype=float) ccount_probs = ccounts/ccounts.sum() w_prob = ccounter[rhs]/ccounts.sum() w_entropy = -np.sum( (ccount_probs * np.log(ccount_probs)) ) prob_chain.append(w_prob) entropy_chain.append(w_entropy) term_prods.append( (lhs, rhs, lmk, rel) ) return prob_chain, entropy_chain, lhs_rhs_parent_chain, term_prods
def train(model, triples, ent_num): logging.info("Start Training...") logging.info("batch_size = %d" % config.batch_size) logging.info("dim = %d" % config.ent_dim) logging.info("gamma = %f" % config.gamma) current_lr = config.learning_rate train_triples, valid_triples, test_triples = triples all_true_triples = train_triples + valid_triples + test_triples rtp = rel_type(train_triples) optimizer = get_optim("Adam", model, current_lr) train_iterator = train_data_iterator(train_triples, ent_num) if config.init_checkpoint: logging.info("Loading checkpoint...") checkpoint = torch.load(os.path.join(config.save_path, "checkpoint")) init_step = checkpoint["step"] + 1 model.load_state_dict(checkpoint["model_state_dict"]) if config.use_old_optimizer: current_lr = checkpoint["current_lr"] optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) else: init_step = 1 max_hit1 = 0.0 max_mrr = 0.0 training_logs = [] # Training Loop for step in range(init_step, config.max_step): log = train_step(model, optimizer, next(train_iterator)) training_logs.append(log) # log if step % config.log_step == 0: metrics = {} for metric in training_logs[0].keys(): metrics[metric] = sum([log[metric] for log in training_logs ]) / len(training_logs) log_metrics("Training average", step, metrics) training_logs = [] # valid if step % config.valid_step == 0: logging.info( "---------------Evaluating on Valid Dataset---------------") metrics = test_step(model, valid_triples, all_true_triples, ent_num, rtp) metrics, metrics1, metrics2, metrics3, metrics4, metrics5, metrics6, metrics7, metrics8 = metrics logging.info("----------------Overall Results----------------") log_metrics("Valid", step, metrics) logging.info("-----------Prediction Head... 1-1 -------------") log_metrics("Valid", step, metrics1) logging.info("-----------Prediction Head... 1-M -------------") log_metrics("Valid", step, metrics2) logging.info("-----------Prediction Head... M-1 -------------") log_metrics("Valid", step, metrics3) logging.info("-----------Prediction Head... M-M -------------") log_metrics("Valid", step, metrics4) logging.info("-----------Prediction Tail... 1-1 -------------") log_metrics("Valid", step, metrics5) logging.info("-----------Prediction Tail... 1-M -------------") log_metrics("Valid", step, metrics6) logging.info("-----------Prediction Tail... M-1 -------------") log_metrics("Valid", step, metrics7) logging.info("-----------Prediction Tail... M-M -------------") log_metrics("Valid", step, metrics8) if metrics["HITS@1"] >= max_hit1 or metrics["MRR"] >= max_mrr: if metrics["HITS@1"] > max_hit1: max_hit1 = metrics["HITS@1"] if metrics["MRR"] > max_mrr: max_mrr = metrics["MRR"] save_variable_list = { "step": step, "current_lr": current_lr, } save_model(model, optimizer, save_variable_list) elif current_lr > 0.0000011: current_lr *= 0.1 logging.info("Change learning_rate to %f at step %d" % (current_lr, step)) optimizer = get_optim("Adam", model, current_lr) else: logging.info( "-------------------Training End-------------------") break # best state checkpoint = torch.load(os.path.join(config.save_path, "checkpoint")) model.load_state_dict(checkpoint["model_state_dict"]) step = checkpoint["step"] logging.info( "-----------------Evaluating on Test Dataset-------------------") metrics = test_step(model, test_triples, all_true_triples, ent_num, rtp) metrics, metrics1, metrics2, metrics3, metrics4, metrics5, metrics6, metrics7, metrics8 = metrics logging.info("----------------Overall Results----------------") log_metrics("Test", step, metrics) logging.info("-----------Prediction Head... 1-1 -------------") log_metrics("Test", step, metrics1) logging.info("-----------Prediction Head... 1-M -------------") log_metrics("Test", step, metrics2) logging.info("-----------Prediction Head... M-1 -------------") log_metrics("Test", step, metrics3) logging.info("-----------Prediction Head... M-M -------------") log_metrics("Test", step, metrics4) logging.info("-----------Prediction Tail... 1-1 -------------") log_metrics("Test", step, metrics5) logging.info("-----------Prediction Tail... 1-M -------------") log_metrics("Test", step, metrics6) logging.info("-----------Prediction Tail... M-1 -------------") log_metrics("Test", step, metrics7) logging.info("-----------Prediction Tail... M-M -------------") log_metrics("Test", step, metrics8)
def get_expansion(lhs, parent=None, lmk=None, rel=None): lhs_rhs_parent_chain = [] prob_chain = [] entropy_chain = [] terminals = [] landmarks = [] for n in lhs.split(): if n in NONTERMINALS: if n == parent == 'LANDMARK-PHRASE': # we need to move to the parent landmark lmk = parent_landmark(lmk) lmk_class = (lmk.object_class if lmk else None) lmk_ori_rels = get_lmk_ori_rels_str(lmk) lmk_color = (lmk.color if lmk else None) rel_class = rel_type(rel) dist_class = (rel.measurement.best_distance_class if hasattr( rel, 'measurement') else None) deg_class = (rel.measurement.best_degree_class if hasattr( rel, 'measurement') else None) cp_db = CProduction.get_production_counts( lhs=n, parent=parent, lmk_class=lmk_class, lmk_ori_rels=lmk_ori_rels, lmk_color=lmk_color, rel=rel_class, dist_class=dist_class, deg_class=deg_class) if cp_db.count() <= 0: logger( 'Could not expand %s (parent: %s, lmk_class: %s, lmk_ori_rels: %s, lmk_color: %s, rel: %s, dist_class: %s, deg_class: %s)' % (n, parent, lmk_class, lmk_ori_rels, lmk_color, rel_class, dist_class, deg_class)) terminals.append(n) continue ckeys, ccounts = zip(*[(cprod.rhs, cprod.count) for cprod in cp_db.all()]) ccounter = {} for cprod in cp_db.all(): if cprod.rhs in ccounter: ccounter[cprod.rhs] += cprod.count else: ccounter[cprod.rhs] = cprod.count ckeys, ccounts = zip(*ccounter.items()) # print 'ckeys', ckeys # print 'ccounts', ccounts ccounts = np.array(ccounts, dtype=float) ccounts /= ccounts.sum() cprod, cprod_prob, cprod_entropy = categorical_sample( ckeys, ccounts) # print cprod, cprod_prob, cprod_entropy lhs_rhs_parent_chain.append((n, cprod, parent, lmk)) prob_chain.append(cprod_prob) entropy_chain.append(cprod_entropy) lrpc, pc, ec, t, ls = get_expansion(lhs=cprod, parent=n, lmk=lmk, rel=rel) lhs_rhs_parent_chain.extend(lrpc) prob_chain.extend(pc) entropy_chain.extend(ec) terminals.extend(t) landmarks.extend(ls) else: terminals.append(n) landmarks.append(lmk) return lhs_rhs_parent_chain, prob_chain, entropy_chain, terminals, landmarks
def get_words(terminals, landmarks, rel=None, prevword=None): words = [] probs = [] alphas = [] entropy = [] C = CWord.get_count for n, lmk in zip(terminals, landmarks): # if we could not get an expansion for the LHS, we just pass down the unexpanded nonterminal symbol # it gets the probability of 1 and entropy of 0 if n in NONTERMINALS: words.append(n) probs.append(1.0) entropy.append(0.0) continue lmk_class = (lmk.object_class if lmk else None) lmk_color = (lmk.color if lmk else None) rel_class = rel_type(rel) dist_class = (rel.measurement.best_distance_class if hasattr( rel, 'measurement') else None) deg_class = (rel.measurement.best_degree_class if hasattr( rel, 'measurement') else None) meaning = dict(pos=n, lmk_class=lmk_class, lmk_ori_rels=get_lmk_ori_rels_str(lmk), lmk_color=lmk_color, rel=rel_class, rel_dist_class=dist_class, rel_deg_class=deg_class) cp_db_uni = CWord.get_word_counts(**meaning) ccounter = {} for c in cp_db_uni: ccounter[c.word] = ccounter.get(c.word, 0) + c.count ckeys, ccounts_uni = zip(*ccounter.items()) ccounts_uni = np.array(ccounts_uni, dtype=float) ccounts_uni /= ccounts_uni.sum() prev_word = words[-1] if words else prevword alpha = C(prev_word=prev_word, **meaning) / C(**meaning) alphas.append(alpha) if alpha: cp_db_bi = CWord.get_word_counts(prev_word=prev_word, **meaning) ccounter = {} for c in cp_db_bi: ccounter[c.word] = ccounter.get(c.word, 0) + c.count ccounts_bi = np.array([ccounter.get(k, 0) for k in ckeys], dtype=float) ccounts_bi /= ccounts_bi.sum() cprob = (alpha * ccounts_bi) + ((1 - alpha) * ccounts_uni) else: cprob = ccounts_uni # if cp_db.count() <= 0: # logger( 'Could not expand %s (lmk_class: %s, lmk_color: %s, rel: %s, dist_class: %s, deg_class: %s)' % (n, lmk_class, lmk_color, rel_class, dist_class, deg_class) ) # terminals.append( n ) # continue # ckeys, ccounts = zip(*[(cword.word,cword.count) for cword in cp_db.all()]) # ccounter = {} # for cword in cp_db.all(): # if cword.word in ccounter: ccounter[cword.word] += cword.count # else: ccounter[cword.word] = cword.count # ckeys, ccounts = zip(*ccounter.items()) # print 'ckeys', ckeys # print 'ccounts', ccounts # ccounts = np.array(ccounts, dtype=float) # ccounts /= ccounts.sum() w, w_prob, w_entropy = categorical_sample(ckeys, cprob) words.append(w) probs.append(w_prob) entropy.append(w_entropy) p, H = np.prod(probs), np.sum(entropy) # print 'expanding %s to %s (p: %f, H: %f)' % (terminals, words, p, H) return words, p, H, alphas