Esempio n. 1
0
def create_candid_duration2date(doc):
    """
    if a TimeX3 in the sentence is of type "duration", try to link it to a nearby type "date" TimeX3
    if there is already another "date" in the same sentence, then skip this step
    """
    candid = []
    for s in doc.sents:
        date = [tx for tx in s.timex3s if tx.type == 'DATE']
        duration = [tx for tx in s.timex3s if tx.type == 'DURATION']
        if not len(date) == 0 and len(duration) > 0:
            print len(duration), 'duraion in sentence'
            for i in range(1, 8):
                pre_sent_num = s.num - i
                if pre_sent_num >= 0:
                    d = [tx for tx in doc.sents[pre_sent_num].timex3s if tx.type == 'DATE']
                    if d != []:
                        for tx in duration:
                            tl = create_tlink_between_enty('dummy_id', d[0], tx, src='candid')
                            candid.append(tl)
                        break
                post_sent_num = s.num + i
                if post_sent_num < len(doc.sents):
                    d = [tx for tx in doc.sents[post_sent_num].timex3s if tx.type == 'DATE']
                    if d != []:
                        for tx in duration:
                            tl = create_tlink_between_enty('dummy_id', tx, d[0], src='candid')
                            candid.append(tl)
                        break
    return candid
Esempio n. 2
0
def expand(s):
    inferable_pair = [tpl[0:2] for tpl in accepted_state]
    uncertain_pair = [tpl[0:2] for tpl in unknown_state]
    infered = []
    sorted_entits = sorted(s.events + s.timex3s, key=lambda x: x.span[0].begin)
    l = len(sorted_entits)
    for r in range(2, l):
        for fi in range(0, l - r):
            from_enty = sorted_entits[fi]
            ti = fi + r
            to_enty = sorted_entits[ti]
            tl3 = create_tlink_between_enty('dummy_id', from_enty, to_enty, src='expand')
            tl3.votes = {'BEFORE': 0, 'OVERLAP': 0, 'AFTER': 0}
            for d in range(1, r):
                mi = fi + d
                mid_enty = sorted_entits[mi]
                tl1 = search_tlink_between_enty(from_enty, mid_enty, s.candids_within)
                tl2 = search_tlink_between_enty(mid_enty, to_enty, s.candids_within)
                if tl1 and tl2:
                    if (tl1.pred, tl2.pred) in inferable_pair[0:3]:
                        tl3.votes['BEFORE'] += 1
                    if (tl1.pred, tl2.pred) in inferable_pair[3:6]:
                        tl3.votes['AFTER'] += 1
                    if (tl1.pred, tl2.pred) in uncertain_pair:
                        tl3.votes['OVERLAP'] += 1
                else:
                    print 'some bridge tlink is not ready'
            if sum(tl3.votes.values()) > 0:
                tl3.pred = get_kth_large_key(tl3.votes)
                infered.append(tl3)
                s.candids_within.append(tl3)
Esempio n. 3
0
def create_candid_within3(doc):
    candids = []
    for s in doc.sents:
        s.candids_within = []
        sorted_entits = sorted(s.events + s.timex3s, key=lambda x: x.span[0].begin)
        '''
        s.freq_tl = []
        s.freq_tx = []
        for tx in sorted(s.timex3s, key=lambda x: x.span[0].begin):
            if tx.type == 'FREQUENCY':
                s.freq_tx.append(tx)
        for tx in s.freq_tx:
            i = sorted_entits.index(tx)
            for d in range(1, len(sorted_entits)):
                if i - d >= 0:
                    e = sorted_entits[i - d]
                    if e.sig == 'Event':
                        tl = create_tlink_between_enty('dummy_id', e, tx, src='candid')
                        s.freq_tl.append(tl)
                        #s.candids_within.append(tl)
                        #candids.append(tl)
            sorted_entits.remove(tx)
        '''
        # generate within sentence candidate
        for pair in combinations(sorted_entits, 2):
            tl = create_tlink_between_enty('dummy_id', pair[0], pair[1], src='candid')
            s.candids_within.append(tl)
            candids.append(tl)
    return candids
Esempio n. 4
0
def create_candid_within2(doc):
    candids = []
    for s in doc.sents:
        s.candids_within = []
        # generate within sentence candidate
        sorted_entits = sorted(s.events + s.timex3s, key=lambda x: x.span[0].begin)
        for i in range(len(sorted_entits) - 1):
            tl = create_tlink_between_enty('dummy_id', sorted_entits[i], sorted_entits[i + 1], src='candid')
            s.candids_within.append(tl)
            candids.append(tl)
        #for i in range(len(sorted_entits) - 1):
        #    tl = create_tlink_between_enty('dummy_id', sorted_entits[i], sorted_entits[i + 1], src='candid')
        #    candid.append(tl)
    return candids
Esempio n. 5
0
def create_candid_coref(doc):
    sent_src = doc.sents[:]
    candid = []
    for i in range(len(sent_src) - 1):
        fs = sent_src[i]
        fs_entits = fs.events
        for ts in sent_src[i + 1:]:
            ts_entits = ts.events
            for pair in product(fs_entits, ts_entits):
                if get_phrase_head(pair[0]).lemma == get_phrase_head(pair[1]).lemma:
                    tl = create_tlink_between_enty('dummy_id', pair[0], pair[1])
                    tl.pred = 'OVERLAP'
                    candid.append(tl)
    return candid
Esempio n. 6
0
def enumerate_possible_between(doc):
    sent_src = doc.sents[:]
    for i in [doc.admission_event, doc.admission_timex3,
              doc.discharge_event, doc.discharge_timex3]:
        if i:
            sent_src.remove(i.sent)               
    ptls = []
    ptl_counter = 0
    for i in range(len(sent_src) - 1):
        fs = sent_src[i]
        fs_entits = fs.events + fs.timex3s
        for ts in sent_src[i + 1:]:
            ts_entits = ts.events + ts.timex3s
            for pair in product(fs_entits, ts_entits):
                tl = create_tlink_between_enty(doc.ds_id + '_ptl_' + str(ptl_counter), pair[0], pair[1])
                ptls.append(tl)
                ptl_counter += 1
    return ptls
Esempio n. 7
0
def create_candid_event2sectime(doc):
    event_src = doc.events[:]
    if doc.admission_event:
        event_src.remove(doc.admission_event)
    if doc.discharge_event:
        event_src.remove(doc.discharge_event)
    candid = []
    for e in event_src:
        if e.sent.sect == 0:
            if doc.admission_timex3:
                tl = create_tlink_between_enty('dummy_id', doc.admission_timex3, e, src='candid') 
            else:
                tl = create_tlink_between_enty('dummy_id', doc.admission_event, e, src='candid')
            if e.text.lower() == 'admission':
                tl.pred = 'OVERLAP'
            else:
                tl.pred = 'AFTER'
            candid.append(tl)
        if e.sent.sect == 1:
            if doc.discharge_timex3:
                tl = create_tlink_between_enty('dummy_id', doc.discharge_timex3, e, src='candid')
            else:
                tl = create_tlink_between_enty('dummy_id', doc.discharge_event, e, src='candid')
            if 'discharge' in e.text.lower():
                tl.pred = 'OVERLAP'
            else:
                tl.pred = 'AFTER'
            candid.append(tl)
    if doc.admission_event and doc.admission_timex3:
        tl = create_tlink_between_enty('dummy_id', doc.admission_event, doc.admission_timex3, src='candid')
        tl.pred = 'OVERLAP'
        candid.append(tl)
    if doc.discharge_event and doc.discharge_timex3:
        tl = create_tlink_between_enty('dummy_id', doc.discharge_event, doc.discharge_timex3, src='candid')
        tl.pred = 'OVERLAP'
        candid.append(tl)
    return candid