def main(): sys.stdout = open(os.path.join(project_base, data_pre_1 + "_" + data_pre_2 + "_between_log"), "w") data_pre = data_pre_1 try: with open(os.path.join(project_base, data_pre + "_dmp"), "r") as fp: print "un-pickling..." ds = cPickle.load(fp) except IOError: print "something wrong with the pickle file path" between_tls = [] for doc in ds.docs.values(): between_tls.extend(doc.tlinks_between_gold) cfg = Config(mallet_bin, project_temp, data_pre + "_between") train_cfg = train_on_data(between_tls, lambda x: x.ds_id, lambda x: x.type, between_tlink_feats, "MaxEnt", cfg) data_pre = data_pre_2 try: with open(os.path.join(project_base, data_pre + "_dmp"), "r") as fp: print "un-pickling..." ds = cPickle.load(fp) except IOError: print "something wrong with the pickle file path" pair = [0, 0, 0, 0] label = [0, 0, 0, 0] for doc in ds.docs.values(): gold = doc.tlinks_between_gold candid = create_candid_event2sectime(doc) coref = create_candid_coref(doc) for c in coref: tl = search_tlink_between_enty(c.from_enty, c.to_enty, candid) if tl: tl.pred = "OVERLAP" else: candid.append(c) update_candid_id(candid, doc.ds_id) cfg = Config(mallet_bin, project_temp, data_pre + "_between_" + doc.ds_id) ti = apply_to_data(candid, lambda x: x.ds_id, between_tlink_feats, train_cfg.model_path, cfg) for c in candid: c.pred = ti.ID2pred[c.ds_id] print "document:", doc.ds_id rslt = verify(candid, gold) print rslt pair = [x + y for (x, y) in zip(pair, rslt[0])] label = [x + y for (x, y) in zip(label, rslt[1])] print "pair", pair print "label", label
def main(): data_pre = data_pre_1 try: with open(os.path.join(project_base, data_pre + '_dmp'), 'r') as fp: print 'un-pickling...' ds = cPickle.load(fp) except IOError: print 'something wrong with the pickle file path' within_tls = [] for doc in ds.docs.values(): within_tls.extend(doc.tlinks_within_gold) within_tls.extend(doc.tlinks_within_closure) train_cfg = Config(mallet_bin, project_temp, data_pre + '_within') train_on_data(within_tls, lambda x: x.ds_id, lambda x: x.type, within_tlink_feats, 'MaxEnt', train_cfg) data_pre = data_pre_2 try: with open(os.path.join(project_base, data_pre + '_dmp'), 'r') as fp: print 'un-pickling...' ds = cPickle.load(fp) except IOError: print 'something wrong with the pickle file path' within_candids = [] between_candids = [] for doc in ds.docs.values(): # within w_candid = create_candid_within3(doc) update_candid_id(w_candid, doc.ds_id) within_candids.extend(w_candid) test_cfg = Config(mallet_bin, project_temp, data_pre + '_within_' + doc.ds_id) ti = apply_to_data(w_candid, lambda x: x.ds_id, within_tlink_feats, train_cfg.model_path, test_cfg) for c in w_candid: c.pred = ti.ID2pred[c.ds_id] c.probs = ti.ID2probs[c.ds_id] # between b_candid = create_candid_event2sectime(doc) coref = create_candid_coref(doc) for c in coref: tl = search_tlink_between_enty(c.from_enty, c.to_enty, b_candid) if tl: tl.pred = 'OVERLAP' else: b_candid.append(c) between_candids.extend(b_candid) gold_xml = project_base + '/' + data_pre_2 + '_i_sub' + '/' + doc.ds_id + '.xml' if not os.path.exists(gold_xml): print 'skip xml', doc.ds_id continue pred_xml = project_base + '/' + data_pre_2 + '_pred_all' + '/' + doc.ds_id + '.xml' parser = etree.XMLParser(recover=True) try: tree = etree.parse(gold_xml, parser) except: print 'Something wrong with opening/parsing specified input xml file' raise root = tree.getroot() tags = root.find('TAGS') # for evaluation purpose, remove original tlink tlinks = tags.findall('TLINK') for tl in tlinks: tags.remove(tl) counter = 0 for tl in w_candid + b_candid: tl_elmt = etree.Element('TLINK') tl_elmt.set('id', 'TL' + str(counter)) tl_elmt.set('fromID', tl.from_enty.origin_id) tl_elmt.set('fromText', str(tl.from_enty)) tl_elmt.set('toID', tl.to_enty.origin_id) tl_elmt.set('toText', str(tl.to_enty)) tl_elmt.set('type', tl.pred) # the 'tail' is set so that each TLINK will start from a new line tl_elmt.tail = '\n' tags.append(tl_elmt) counter += 1 with open(pred_xml, 'w') as fp: fp.write(etree.tostring(root, xml_declaration=True).replace('/>', ' />')) """
def main(): data_pre = data_pre_1 try: with open(os.path.join(project_base, data_pre + "_dmp"), "r") as fp: print "un-pickling..." ds = cPickle.load(fp) except IOError: print "something wrong with the pickle file path" within_tls = [] for doc in ds.docs.values(): within_tls.extend(doc.tlinks_within_gold) within_tls.extend(doc.tlinks_within_closure) cfg = Config(mallet_bin, project_temp, data_pre + "_within") train_cfg = train_on_data(within_tls, lambda x: x.ds_id, lambda x: x.type, within_tlink_feats, "MaxEnt", cfg) data_pre = data_pre_2 try: with open(os.path.join(project_base, data_pre + "_dmp"), "r") as fp: print "un-pickling..." ds = cPickle.load(fp) except IOError: print "something wrong with the pickle file path" all_a = [] all_c = [] all_u = [] all_i = [] all_m = [] all_um = [] all_uw = [] all_mi = [] all_within = [] all_candid = [] for doc in ds.docs.values(): within = doc.tlinks_within_gold + doc.tlinks_within_closure for tl in within: tl.sent.tlinks_within.append(tl) all_within.extend(within) candid = create_candid_within3(doc) update_candid_id(candid, doc.ds_id) all_candid.extend(candid) cfg = Config(mallet_bin, project_temp, data_pre + "_within_" + doc.ds_id) ti = apply_to_data(candid, lambda x: x.ds_id, within_tlink_feats, train_cfg.model_path, cfg) for c in candid: c.pred = ti.ID2pred[c.ds_id] c.probs = ti.ID2probs[c.ds_id] # for s in doc.sents: # expand(s) for s in doc.sents: # print s.ds_id """ if s.freq_tx != []: print 'freq_tx:', s.freq_tx for tl in s.freq_tl: tl.pred = 'OVERLAP' tx3s_copy = s.timex3s[:] for tx in s.freq_tx: tx3s_copy.remove(tx) a, c, u, i = get_conflict_info2(s.events + tx3s_copy, s.candids_within, lambda x: x.span[0].begin, lambda x: x.pred) """ a, c, u, i = get_conflict_info2( s.events + s.timex3s, s.candids_within, lambda x: x.span[0].begin, lambda x: x.pred ) all_a.extend(a) all_c.extend(c) all_u.extend(u) all_i.extend(i) print "document:", doc.ds_id, "\n" for s in doc.sents: # print s.ds_id # m, um, uw, mi = verify(s.candids_within + s.freq_tl, s.tlinks_within) m, um, uw, mi = verify(s.candids_within, s.tlinks_within) all_m.extend(m) all_um.extend(um) all_uw.extend(uw) all_mi.extend(mi) """ resolve_conflict_within2(s) print 'after resolution:', doc.ds_id, '\n' rslt = verify(s.candids_within, s.tlinks_within) print s.ds_id, rslt, '\n' pair1 = [x + y for (x, y) in zip(pair1, rslt[0])] label1 = [x + y for (x, y) in zip(label1, rslt[1])] """ all_rslt = [all_a, all_c, all_u, all_i, all_m, all_um, all_uw, all_mi, all_within, all_candid] try: with open(os.path.join(project_base, data_pre_2 + "_within_result_dmp"), "w") as fp: cPickle.dump(all_rslt, fp) except: print "something wrong when dumping"