def get_softmax_margin_partition(factorexprs, goldfactors, valid_fes, sentlen): logalpha = [None for _ in range(sentlen)] for j in range(sentlen): # full length spans spanscores = [] if not USE_SPAN_CLIP or j <= ALLOWED_SPANLEN: spanscores = [ factorexprs[Factor(0, j, y)] + cost(Factor(0, j, y), goldfactors) for y in valid_fes ] # recursive case istart = 0 if USE_SPAN_CLIP and j > ALLOWED_SPANLEN: istart = max(0, j - ALLOWED_SPANLEN - 1) for i in range(istart, j): facscores = [ logalpha[i] + factorexprs[Factor(i + 1, j, y)] + cost(Factor(i + 1, j, y), goldfactors) for y in valid_fes ] spanscores.extend(facscores) if not USE_SPAN_CLIP and len(spanscores) != len(valid_fes) * (j + 1): raise Exception("counting errors") logalpha[j] = dy.logsumexp(spanscores) return logalpha[sentlen - 1]
def get_hinge_partition(factorexprs, goldfacs, valid_fes, sentlen): alpha = [None for _ in range(sentlen)] backpointers = [None for _ in range(sentlen)] for j in range(sentlen): # full length spans bestscore = float("-inf") if not USE_SPAN_CLIP or j <= ALLOWED_SPANLEN: for y in valid_fes: factor = Factor(0, j, y) facscore = factorexprs[factor] + cost(factor, goldfacs) if facscore.scalar_value() > bestscore: bestscore = facscore.scalar_value() alpha[j] = facscore backpointers[j] = (0, y) # recursive case istart = 0 if USE_SPAN_CLIP and j > ALLOWED_SPANLEN: istart = max(0, j - ALLOWED_SPANLEN - 1) for i in range(istart, j): for y in valid_fes: factor = Factor(i + 1, j, y) facscore = alpha[i] + factorexprs[factor] + cost( factor, goldfacs) if facscore.scalar_value() > bestscore: bestscore = facscore.scalar_value() alpha[j] = facscore backpointers[j] = (i + 1, y) predfactors = [] j = sentlen - 1 i = backpointers[j][0] while i >= 0: fe = backpointers[j][1] predfactors.append(Factor(i, j, fe)) if i == 0: break j = i - 1 i = backpointers[j][0] return alpha[sentlen - 1], predfactors
def get_factor_expressions(fws, bws, tfemb, tfdict, valid_fes, sentence, spaths_x=None, cpaths_x=None): factexprs = {} sentlen = len(fws) sortedtfd = sorted(list(tfdict.keys())) targetspan = (sortedtfd[0], sortedtfd[-1]) for j in range(sentlen): istart = 0 if USE_SPAN_CLIP and j > ALLOWED_SPANLEN: istart = max(0, j - ALLOWED_SPANLEN) for i in range(istart, j + 1): spanlen = dy.scalarInput(j - i + 1) logspanlen = dy.scalarInput(math.log(j - i + 1)) spanwidth = sp_x[SpanWidth.howlongisspan(i, j)] spanpos = ap_x[ArgPosition.whereisarg((i, j), targetspan)] fbemb_ij_basic = dy.concatenate([ fws[i][j], bws[i][j], tfemb, spanlen, logspanlen, spanwidth, spanpos ]) if USE_DEPS: outs = oh_s[OutHeads.getnumouts(i, j, sentence.outheads)] shp = spaths_x[sentence.shortest_paths[(i, j, targetspan[0])]] fbemb_ij = dy.concatenate([fbemb_ij_basic, outs, shp]) elif USE_CONSTITS: isconstit = dy.scalarInput((i, j) in sentence.constitspans) lca = ct_x[sentence.lca[(i, j)][1]] phrp = cpaths_x[sentence.cpaths[(i, j, targetspan[0])]] fbemb_ij = dy.concatenate( [fbemb_ij_basic, isconstit, lca, phrp]) else: fbemb_ij = fbemb_ij_basic for y in valid_fes: fctr = Factor(i, j, y) if USE_HIER and y in feparents: fefixed = dy.esum([fe_x[y]] + [fe_x[par] for par in feparents[y]]) else: fefixed = fe_x[y] fbemb_ijy = dy.concatenate([fefixed, fbemb_ij]) factexprs[fctr] = w_f * dy.rectify(w_z * fbemb_ijy + b_z) + b_f return factexprs
def get_all_span_reduced_bert_embeddings(examples): my_dict = {} pca_list = [] for trex in examples: reg_sentence = get_sentence_from_conll_example(trex) senlen = len(reg_sentence.split(" ")) for j in range(senlen): istart = 0 if USE_SPAN_CLIP and j > ALLOWED_SPANLEN: istart = max(0, j - ALLOWED_SPANLEN) for i in range(istart, j + 1): # Get span expression through BERT fbemb_ij = get_span_bert_expression(reg_sentence, i, j) pca_list.append(fbemb_ij) if USE_PCA: pca = PCA(435) new_matrix = pca.fit_transform(pca_list) else: new_matrix = transform_auto_encoder(pca_list) index_keeper = 0 for trex in examples: # get valid fes per example tfdict = trex.targetframedict tfkeys = sorted(tfdict) tg_start = tfkeys[0] lu, frame = tfdict[tg_start] valid_fes = frmfemap[frame.id] + [NOTANFEID] factexprs = {} reg_sentence = get_sentence_from_conll_example(trex) senlen = len(reg_sentence.split(" ")) for j in range(senlen): istart = 0 if USE_SPAN_CLIP and j > ALLOWED_SPANLEN: istart = max(0, j - ALLOWED_SPANLEN) for i in range(istart, j + 1): for y in valid_fes: fctr = Factor(i, j, y) if USE_HIER and y in feparents: fefixed = dy.esum([fe_x[y]] + [fe_x[par] for par in feparents[y]]) else: fefixed = fe_x[y] fbemb_ij = dy.inputTensor(new_matrix[index_keeper]) fbemb_ijy = dy.concatenate([fefixed, fbemb_ij]) factexprs[fctr] = w_f * dy.rectify(w_z * fbemb_ijy + b_z) + b_f index_keeper = index_keeper + 1 my_dict[trex] = factexprs return my_dict
def get_loss(factorexprs, gold_fes, valid_fes, sentlen): if options.loss == "hinge": return get_hinge_loss(factorexprs, gold_fes, valid_fes, sentlen) goldfactors = [ Factor(span[0], span[1], feid) for feid in gold_fes for span in gold_fes[feid] ] numeratorexprs = [factorexprs[gf] for gf in goldfactors] numerator = dy.esum(numeratorexprs) if options.loss == "log": partition = get_logloss_partition(factorexprs, valid_fes, sentlen) elif options.loss == "softmaxm": partition = get_softmax_margin_partition(factorexprs, goldfactors, valid_fes, sentlen) else: raise Exception("undefined loss function", options.loss) lossexp = partition - numerator if partition.scalar_value() < numerator.scalar_value(): sys.stderr.write( "WARNING: partition ~~ numerator! possibly overfitting difference = %f\n" % lossexp.scalar_value()) return None if lossexp.scalar_value() < 0.0: sys.stderr.write(str(gold_fes) + "\ngolds\n") gsum = 0 for fac in goldfactors: gsum += factorexprs[fac].scalar_value() sys.stderr.write( fac.to_str(FEDICT) + " " + str(factorexprs[fac].scalar_value()) + "\n") sys.stderr.write("my calculation = " + str(gsum) + " vs " + str(numerator.scalar_value()) + "\n") for j in range(sentlen): sys.stderr.write(":" + str(j) + "\t") if not USE_SPAN_CLIP or j <= ALLOWED_SPANLEN: sys.stderr.write("0 ") istart = 0 if USE_SPAN_CLIP and j > ALLOWED_SPANLEN: istart = max(0, j - ALLOWED_SPANLEN - 1) for i in range(istart, j): sys.stderr.write(str(i + 1) + " ") sys.stderr.write("\n") raise Exception("negative probability! probably overcounting spans?", numerator.scalar_value(), partition.scalar_value(), lossexp.scalar_value()) return lossexp
def get_hinge_loss(factorexprs, gold_fes, valid_fes, sentlen): goldfactors = [Factor(span[0], span[1], feid) for feid in gold_fes for span in gold_fes[feid]] numeratorexprs = [factorexprs[gf] for gf in goldfactors] numerator = dy.esum(numeratorexprs) denominator, predfactors = get_hinge_partition(factorexprs, goldfactors, valid_fes, sentlen) if set(predfactors) == set(goldfactors): return None hingeloss = denominator - numerator if denominator.scalar_value() < numerator.scalar_value(): raise Exception("ERROR: predicted cost less than gold!", denominator.scalar_value(), numerator.scalar_value(), hingeloss.scalar_value()) return hingeloss
def decode(factexprscalars, sentlen, valid_fes): alpha = [None for _ in range(sentlen)] backpointers = [None for _ in range(sentlen)] if USE_DROPOUT: raise Exception("incorrect usage of dropout, turn off!\n") for j in range(sentlen): if USE_SPAN_CLIP and j > ALLOWED_SPANLEN: continue bestscore = float("-inf") bestlabel = None for y in valid_fes: fac = Factor(0, j, y) facscore = math.exp(factexprscalars[fac]) if facscore > bestscore: bestscore = facscore bestlabel = y alpha[j] = bestscore backpointers[j] = (0, bestlabel) for j in range(sentlen): bestscore = float("-inf") bestbeg = bestlabel = None if alpha[j] is not None: bestscore = alpha[j] bestbeg, bestlabel = backpointers[j] istart = 0 if USE_SPAN_CLIP and j > ALLOWED_SPANLEN: istart = max(0, j - ALLOWED_SPANLEN - 1) for i in range(istart, j): for y in valid_fes: fac = Factor(i + 1, j, y) facscore = math.exp(factexprscalars[fac]) if facscore * alpha[i] > bestscore: bestscore = facscore * alpha[i] bestlabel = y bestbeg = i + 1 alpha[j] = bestscore backpointers[j] = (bestbeg, bestlabel) j = sentlen - 1 i = backpointers[j][0] argmax = {} while i >= 0: fe = backpointers[j][1] if fe in argmax: argmax[fe].append((i, j)) else: argmax[fe] = [(i, j)] if i == 0: break j = i - 1 i = backpointers[j][0] # merging neighboring spans in prediction (to combat spurious ambiguity) mergedargmax = {} for fe in argmax: mergedargmax[fe] = [] if fe == NOTANFEID: mergedargmax[fe].extend(argmax[fe]) continue argmax[fe].sort() mergedspans = [argmax[fe][0]] for span in argmax[fe][1:]: prevsp = mergedspans[-1] if span[0] == prevsp[1] + 1: prevsp = mergedspans.pop() mergedspans.append((prevsp[0], span[1])) else: mergedspans.append(span) mergedargmax[fe] = mergedspans return mergedargmax
sys.stderr.write("Ran out of patience, ending training.\n") sys.stderr.write("Best model evaluation:\n{}\n".format(best_dev_eval_str)) sys.stderr.write("Best model saved to {}\n".format(model_file_name)) sys.stderr.write(" [took %.3fs]\n" % (time.time() - starttime)) break loss = 0.0 elif options.mode == "ensemble": exfs = {x: {} for x in range(len(devexamples))} USE_DROPOUT = False sys.stderr.write("reading ensemble factors...") enf = open(in_ens_file, "rb") for l in enf: fields = l.split("\t") fac = Factor(int(fields[1]), int(fields[2]), FEDICT.getid(fields[3])) exfs[int(fields[0])][fac] = float(fields[4]) enf.close() sys.stderr.write("done!\n") teststarttime = time.time() sys.stderr.write("testing " + str(len(devexamples)) + " examples ...\n") testpredictions = [] for tidx, testex in enumerate(devexamples, 1): if tidx % 100 == 0: sys.stderr.write(str(tidx) + "...") valid_fes_for_frame = frmfemap[testex.frame.id] + [NOTANFEID] testargmax = decode(exfs[tidx - 1], len(testex.tokens), valid_fes_for_frame) testpredictions.append(testargmax)