コード例 #1
0
ファイル: loader.py プロジェクト: zwkatgithub/segment
    def preprocess(self, data, vocab, opt):
        """ Preprocess the data and convert to ids. """
        processed = []
        for d in data:
            tokens = d['token']
            if opt['lower']:
                tokens = [t.lower() for t in tokens]
            # anonymize tokens
            ss, se = d['subj_start'], d['subj_end']
            os, oe = d['obj_start'], d['obj_end']
            tokens[ss:se + 1] = ['SUBJ-' + d['subj_type']] * (se - ss + 1)
            tokens[os:oe + 1] = ['OBJ-' + d['obj_type']] * (oe - os + 1)
            tokens_idxs = map_to_ids(tokens, vocab.word2id)
            head = [int(x) for x in d['stanford_head']]
            assert any([x == 0 for x in head])
            pos = map_to_ids(d['stanford_pos'], constant.POS_TO_ID)
            ner = map_to_ids(d['stanford_ner'], constant.NER_TO_ID)
            deprel = map_to_ids(d['stanford_deprel'], constant.DEPREL_TO_ID)
            l = len(tokens)
            subj_positions = get_positions(d['subj_start'], d['subj_end'], l)
            obj_positions = get_positions(d['obj_start'], d['obj_end'], l)

            relation = constant.LABEL_TO_ID[d['relation']]
            stp_tokens_idxs = tree_to_seq(
                head_to_tree(np.array(head), np.array(tokens), l, 0,
                             np.array(subj_positions),
                             np.array(obj_positions)), tokens)
            hop1_tokens_idxs = tree_to_seq(
                head_to_tree(np.array(head), np.array(tokens), l, 1,
                             np.array(subj_positions),
                             np.array(obj_positions)), tokens)

            stp_tokens_idxs, stp_pos, stp_ner, stp_deprel, stp_subj_positions, stp_obj_positions = get_path_input(
                tokens, pos, ner, deprel, stp_tokens_idxs,
                'SUBJ-' + d['subj_type'], 'OBJ-' + d['obj_type'], vocab)
            hop1_tokens_idxs, hop1_pos, hop1_ner, hop1_deprel, hop1_subj_positions, hop1_obj_positions = get_path_input(
                tokens, pos, ner, deprel, hop1_tokens_idxs,
                'SUBJ-' + d['subj_type'], 'OBJ-' + d['obj_type'], vocab)

            processed += [
                (tokens_idxs, pos, ner, deprel, subj_positions, obj_positions,
                 relation, stp_tokens_idxs, stp_pos, stp_ner, stp_deprel,
                 stp_subj_positions, stp_obj_positions, relation,
                 hop1_tokens_idxs, hop1_pos, hop1_ner, hop1_deprel,
                 hop1_subj_positions, hop1_obj_positions, relation)
            ]

        return processed
コード例 #2
0
    def inputs_to_tree_reps(self, dep_head, seq_len, subj_pos, obj_pos,
                            dep_rel, device):
        maxlen = max(seq_len)
        trees = [
            head_to_tree(dep_head[i], seq_len[i],
                         self.config.syntax['prune_k'], subj_pos[i],
                         obj_pos[i], dep_rel[i]) for i in range(len(seq_len))
        ]

        # Making "self_loop=True" as adj will be used as a masking matrix during graph attention
        adj_matrix_list, dep_rel_matrix_list = [], []
        for tree in trees:
            adj_matrix, dep_rel_matrix = tree_to_adj(
                maxlen,
                tree,
                directed=False,
                self_loop=self.config.syntax['adj_self_loop'])
            adj_matrix = adj_matrix.reshape(1, maxlen, maxlen)
            adj_matrix_list.append(adj_matrix)

            dep_rel_matrix = dep_rel_matrix.reshape(1, maxlen, maxlen)
            dep_rel_matrix_list.append(dep_rel_matrix)

        batch_adj_matrix = torch.from_numpy(
            np.concatenate(adj_matrix_list, axis=0))
        batch_dep_rel_matrix = torch.from_numpy(
            np.concatenate(dep_rel_matrix_list, axis=0))
        return Variable(batch_adj_matrix.to(device)), \
               Variable(batch_dep_rel_matrix.to(device))
コード例 #3
0
ファイル: gcn.py プロジェクト: Saintfe/RECENT
 def inputs_to_tree_reps(head, words, l, prune, subj_pos, obj_pos):
     head, words, subj_pos, obj_pos = head.cpu().numpy(), words.cpu().numpy(), subj_pos.cpu().numpy(), obj_pos.cpu().numpy()
     trees = [head_to_tree(head[i], words[i], l[i], prune, subj_pos[i], obj_pos[i]) for i in range(len(l))]
     adj = [tree_to_adj(maxlen, tree, directed=False, self_loop=False).reshape(1, maxlen, maxlen) for tree in trees]
     adj = np.concatenate(adj, axis=0)
     adj = torch.from_numpy(adj)
     return Variable(adj.cuda()) if self.opt['cuda'] else Variable(adj)
コード例 #4
0
def extract_trees(data, prune_k):
    relation2trees = {}
    for d in data:
        tokens = list(d['token'])
        # anonymize tokens
        ss, se = d['subj_start'], d['subj_end']
        os, oe = d['obj_start'], d['obj_end']
        tokens[ss:se + 1] = ['SUBJ-' + d['subj_type']] * (se - ss + 1)
        tokens[os:oe + 1] = ['OBJ-' + d['obj_type']] * (oe - os + 1)
        head = [int(x) for x in d['stanford_head']]
        deprel = d['stanford_deprel']
        l = len(tokens)
        subj_positions = get_positions(d['subj_start'], d['subj_end'], l)
        obj_positions = get_positions(d['obj_start'], d['obj_end'], l)
        relation = d['relation']
        if relation not in relation2trees:
            relation2trees[relation] = []
        _, tree = head_to_tree(head=np.array(head),
                               tokens=np.array(tokens),
                               len_=l,
                               prune=prune_k,
                               subj_pos=np.array(subj_positions),
                               obj_pos=np.array(obj_positions),
                               deprel=np.array(deprel))
        relation2trees[relation].append(tree)
    return relation2trees
コード例 #5
0
 def inputs_to_tree_reps(head, l):
     trees = [head_to_tree(head[i], l[i]) for i in range(len(l))]
     adj = [
         tree_to_adj(maxlen, tree,
                     directed=False).reshape(1, maxlen, maxlen)
         for tree in trees
     ]
     adj = np.concatenate(adj, axis=0)
     adj = torch.from_numpy(adj)
     return Variable(adj.cuda()) if self.opt['cuda'] else Variable(adj)
コード例 #6
0
def inputs_to_tree_reps(head,
                        words,
                        l,
                        prune,
                        subj_pos,
                        obj_pos,
                        deprel=None,
                        maxlen=100):
    head, words, subj_pos, obj_pos = head.cpu().numpy(), words.cpu().numpy(
    ), subj_pos.cpu().numpy(), obj_pos.cpu().numpy()
    if deprel is not None:
        deprel = deprel.cpu().numpy()
        trees = [
            head_to_tree(head[i], words[i], l[i], prune, subj_pos[i],
                         obj_pos[i], deprel[i]) for i in range(len(l))
        ]
    else:
        trees = [
            head_to_tree(head[i], words[i], l[i], prune, subj_pos[i],
                         obj_pos[i]) for i in range(len(l))
        ]
    # adj 邻接边为边类型
    adjs = []
    adjs_r = []
    for tree in trees:
        adj = tree_to_adj(maxlen, tree, directed=True, edge_info=True)
        adj_r = adj.T.copy()
        adj_r[adj_r > 1] += constant.DEPREL_COUNT
        adjs.append(adj.reshape(1, maxlen, maxlen))
        adjs_r.append(adj_r.reshape(1, maxlen, maxlen))
    adjs = np.concatenate(adjs, axis=0)
    adjs_r = np.concatenate(adjs_r, axis=0)
    adjs = torch.from_numpy(adjs)
    adjs_r = torch.from_numpy(adjs_r)
    adjs = Variable(adjs.cuda())
    adjs_r = Variable(adjs_r.cuda())

    return adjs, adjs_r
コード例 #7
0
ファイル: gcn_lstm_elmo_cdr.py プロジェクト: sunyi123/cdr
        def inputs_to_tree_reps(head, token_id, l, prune, subj_positions, obj_positions):
            trees = [head_to_tree(head[i], token_id[i], l[i], prune, subj_positions[i], obj_positions[i]) for i in range(len(l))]
            adj = [tree_to_adj(maxlen, tree, directed=False, self_loop=False).reshape(1, maxlen, maxlen) for tree in
                   trees]
            new_adj=[]
            for instance in adj:

                target_zero_array = [np.zeros((len(instance[0][0]), len(instance[0][0])), dtype=np.float32)]
                if (instance==target_zero_array).all():
                    new_adj.append([np.eye(len(instance[0][0]), k=1, dtype=np.float32)+np.eye(len(instance[0][0]), k=-1, dtype=np.float32)])
                else:
                    new_adj.append(instance)
            adj = np.concatenate(new_adj, axis=0)
            adj = torch.from_numpy(adj)
            return adj.cuda() if self.opt['cuda'] else adj
コード例 #8
0
    def simulate_data(self, id, aug_id, subj_start, subj_end, obj_start, obj_end):
        d = self.data_index[id]

        sid = id
        imp = 0
        relation = self.label2id[d['relation']]
        # if sid in imps.keys():
        #     imp = imps[sid]
        tokens = list(d['token'])

        raw_tokens = copy.deepcopy(tokens)
        pos = map_to_ids(d['stanford_pos'], constant.POS_TO_ID)
        ner = map_to_ids(d['stanford_ner'], constant.NER_TO_ID)
        deprel = map_to_ids(d['stanford_deprel'], constant.DEPREL_TO_ID)
        head = [int(x) for x in d['stanford_head']]
        # subj_id = list(range(ss, se + 1))
        # obj_id = list(range(os, oe + 1))
        l = len(tokens)
        subj_idx = list(range(subj_start, subj_end + 1))
        obj_idx = list(range(obj_start, obj_end + 1))
        tree, domains, distance = head_to_tree(head, deprel, subj_idx, obj_idx)
        # if d['subj_start'] == subj_span[0] and d['subj_end'] == subj_span[-1] and d['obj_start'] == obj_span[0] and \
        #         d['obj_end'] == obj_span[-1]:
        #     continue
        raw_tokens.append(distance)
        subj_type = d['stanford_ner'][subj_start]
        obj_type = d['stanford_ner'][obj_start]
        # if distance <= 6:
        #     continue
        depmap, ret, rel, resrel, domain, domain_id, redomian_id = tree_to_adj(l, domains, tree)

        tokens[subj_start:subj_end + 1] = ['SUBJ-' + subj_type] * (subj_end - subj_start + 1)
        tokens[obj_start:obj_end + 1] = ['OBJ-' + obj_type] * (obj_end - obj_start + 1)
        tokens = map_to_ids(tokens, self.vocab.word2id)
        subj_positions = get_positions(subj_start, subj_end, l)
        obj_positions = get_positions(obj_start, obj_end, l)
        raw_tokens[subj_start:subj_end + 1] = zip(raw_tokens[subj_start:subj_end + 1], (
                ['SUBJ-' + subj_type] * (subj_end - subj_start + 1)))
        raw_tokens[obj_start:obj_end + 1] = zip(raw_tokens[obj_start:obj_end + 1], (
                ['OBJ-' + obj_type] * (obj_end - obj_start + 1)))

        batch = [(tokens, pos, subj_positions, obj_positions, ner, depmap, ret, rel, resrel, deprel, domain, domain_id,
                  redomian_id,aug_id,distance,relation)]
        # batch_size = len(batch)
        # batch = list(zip(*batch))
        # # assert len(batch) == 10
        #
        # # sort all fields by lens for easy RNN operations
        # lens = [len(x) for x in batch[0]]
        # batch, orig_idx = sort_all(batch, lens)
        # lens = sorted(lens, reverse=True)
        # maxlen = lens[0]
        # domains = [b.shape[1] for b in batch[8]]
        # max_domain = max(domains)
        # words=batch[0]
        # words = get_long_tensor(words, batch_size)
        # masks = torch.eq(words, 0)
        # pos = get_long_tensor(batch[1], batch_size)
        # ner = get_long_tensor(batch[4], batch_size)
        #
        # depmap = padmat(batch[5], maxlen, maxlen)
        # for i in range(batch_size):
        #     depmap[i][len(tokens):, 0] = 1
        # ret = padmat(batch[6], maxlen, maxlen)
        # rel = padmat(batch[7], maxlen, maxlen)
        # resrel = padmat(batch[8], maxlen, maxlen)
        # deprel = get_long_tensor(batch[9], batch_size)
        # domain = padmat(batch[10], maxlen, max_domain)
        # domain_id = padmat(batch[11], maxlen, max_domain)
        # redomain_id = padmat(batch[12], maxlen, max_domain)
        # # head = get_long_tensor(batch[4], batch_size)
        # subj_positions = get_long_tensor(batch[2], batch_size)
        # obj_positions = get_long_tensor(batch[3], batch_size)
        # # subj_type = get_long_tensor(batch[7], batch_size)
        # # obj_type = get_long_tensor(batch[8], batch_size)
        # length = torch.LongTensor(batch[13])
        # raw_tokens = batch[14]
        # rels = torch.LongTensor(batch[15])
        # ids = batch[16]
        # imp = batch[17]
        # return (
        # words, masks, pos, subj_positions, obj_positions, ner, depmap, ret, rel, resrel, deprel, domain, domain_id,
        # redomain_id, rels,orig_idx, length, raw_tokens, ids, imp, batch[16])
        return batch
コード例 #9
0
    def packdata(self,d):
        if not self.is_soft:
            relation = d['relation']
            if isinstance(relation, list):
                relation = relation.index(max(relation))
            else:
                relation = self.label2id[relation]
        else:
            relation = d['soft_label']
        # if self.subj is not None:
        #     if d['subj_type']!=self.subj or d['obj_type']!=self.obj:
        #         return []
        #rev_relation = 0
        # # if relation!=40:
        #     continue
        # rev_relation=self.findRevLabel(d['id'],"test")
        # label_count[relation]+=1
        rd = copy.deepcopy(d)
        # if 'conj' in d['stanford_deprel']:
        #     rd=self.removeconj(rd)
        head = [int(x) for x in d['stanford_head']]
        ners2id=constant.NER_TO_ID
        id2ners=dict([(v, k) for k, v in ners2id.items()])
        # subj_id = list(range(ss,se+1))
        # obj_id=list(range(os,oe+1))
        assert any([x == 0 for x in head])
        tokens = list(rd['token'])
        containDot=True
        if self.opt['lower']:
            tokens = [t.lower() for t in tokens]
        if tokens[-1]!='.':
            containDot=False
        raw_tokens = copy.deepcopy(tokens)
        # tokens = map_to_ids(tokens, vocab.word2id)
        pos = map_to_ids(rd['stanford_pos'], constant.POS_TO_ID)
        ner = map_to_ids(rd['stanford_ner'], constant.NER_TO_ID)
        deprel = map_to_ids(rd['stanford_deprel'], constant.DEPREL_TO_ID)
        head = [int(x) for x in rd['stanford_head']]
        sid = rd['id']
        assert any([x == 0 for x in head])
        l = len(tokens)

        if not self.corefresolve:
            ss, se = rd['subj_start'], rd['subj_end']
            os, oe = rd['obj_start'], rd['obj_end']
            subj_id = list(range(ss, se + 1))
            obj_id = list(range(os, oe + 1))
            tree, domains, distance= head_to_tree(head, deprel, subj_id, obj_id)

            depmap, ret, rel, resrel, domain, domain_subj, domain_obj = tree_to_adj(l, domains, tree)
            # subj_entities, obj_entities = self.getEntitySpan(rd)
            # entities_span = list(set(subj_entities + obj_entities))
            # entity_gragh=self.getEntityGragh(depmap, entities_span)
            # anonymize tokens

            tokens[ss:se + 1] = ['SUBJ-' + rd['subj_type']] * (se - ss + 1)
            tokens[os:oe + 1] = ['OBJ-' + rd['obj_type']] * (oe - os + 1)
            raw_tokens[ss:se + 1] = zip(raw_tokens[ss:se + 1], (['RAWSUBJ-' + d['subj_type']] * (se - ss + 1)))
            raw_tokens[os:oe + 1] = zip(raw_tokens[os:oe + 1], (['RAWOBJ-' + d['obj_type']] * (oe - os + 1)))

            raw_tokens.append(distance)
            subj_positions = get_positions(ss, se, l)
            obj_positions = get_positions(os, oe, l)
            tokens = map_to_ids(tokens, self.vocab.word2id)
            sdp_mask=1*(domain.T[1]==0)
        else:
            #entityspans=self.getEntitySpan(d)
            src_subj=list(range(d['subj_start'],d['subj_end'] + 1))
            src_obj=list(range(d['obj_start'], d['obj_end'] + 1))

            if 'subj_list' in rd.keys():
                subj_list=rd['subj_list']
                obj_list=rd['obj_list']
            else:
                subj_list=[src_subj]
                obj_list=[src_obj]
            # for span in subj_list:
            #     entityspans.remove(subj_list)
            # relpairs=[]
            # # interpairs=[]
            # # corefpairs=[]
            # for subj in subj_list:
            #     for obj in obj_list:
            #         if subj!=obj:
            #             relpairs.append([subj,obj])
            # corefpairs.append()
            # for s in entityspans:
            #     for o in entityspans:
            #         if s!=o:
            #             if [s,o] not in relpairs:
            #                 interpairs.append([s,o])
            # entity_mask=[]
            # coref_mask=[]
            # for pair in interpairs:
            #     mask=[0] * l
            #     mask[pair[0]]=1
            #     mask[pair[1]]=1
            # subj_mask=[]

            def notinter(a,b):
                return len(set(a)&set(b))==0

            relpairs=[]
            for subj in subj_list:
                for obj in obj_list:
                    if notinter(subj,obj):
                        relpairs.append([subj,obj])

            entity_dep=constant.no_pass
            entity_ids=[]
            for i in range(len(tokens)):
                if deprel[i] in entity_dep:
                    entity_ids.append([i])


            entity_ner = [ners2id[d['subj_type']]]
            if d['obj_type'] in ners2id.keys():
                entity_ner.append(ners2id[d['obj_type']])
            if 3 in entity_ner:
                entity_pos = [15, 20]
            else:
                entity_pos = []
            tree, domains, distance,relpair,midhead,entity_chains,sdp_domain = head_to_treeEval(head, deprel, ner,pos,entity_ner,entity_pos,relpairs,build_mid=True)
            # filterrelpair=[]
            # for pair in relpairs:
            #     subj=pair[0]
            #     obj=pair[1]
            #     if isinstance(subj,list):
            #         subj_end=subj[-1]
            #         subj_start=subj[0]
            #         cur=subj_end
            #         h=head[cur]-1
            #         while (h<=subj_end and h>=subj_start):
            #             cur=h
            #             h = head[cur] - 1
            #     layers=midhead[cur]
            #     if isinstance(obj,list):
            #         obj_end=obj[-1]
            #         obj_start=obj[0]
            #         cur=obj_end
            #         h=head[cur]-1
            #         while (h<=obj_end and h>=obj_start):
            #             cur=h
            #             h = head[cur] - 1
            #     layero=midhead[cur]
            #     if not(layero!=layers and (layers>obj_end or layers<obj_start) and (layero>subj_end or layero<subj_start)):
            #         filterrelpair.append([subj,obj])
            iscross=0
            # if len(filterrelpair)==0:
            #     iscross=1

                # if relation!=0:
                #     iscross=True
                #     print("miss lit. su")
                # return []
            # else:
            #     tree, domains, distance, relpair, midhead = head_to_treeEval(head, deprel, relpairs, build_mid=False)
            depmap, ret, rel, resrel, domain,sdp_domain, domain_subj, domain_obj = tree_to_adj(l, domains, tree,entity_chains,sdp_domain)
            # relpairs = rawrelpair
            obj_mask=[-1]*l
            subj_mask=[-1]*l
            aspect=[]
            #relpairs=[rawrelpair]
            relpairs=[relpair]
            for pair in relpairs:
                subj_span=pair[0]
                obj_span=pair[1]
                rtokens=copy.deepcopy(tokens)
                rrawtokens=copy.deepcopy(tokens)
                rsubjmask=copy.deepcopy(subj_mask)
                robjmask=copy.deepcopy(obj_mask)

                for entity_pair in entity_chains[1:]:
                    entity_span=entity_pair[0]
                    entityner=ner[entity_span[0]]
                    if entityner==2:
                        entityner=3
                    rtokens[entity_span[0]:entity_span[-1] + 1] = ['ENTITY_' +id2ners[entityner]] * (entity_span[-1] - entity_span[0] + 1)
                    rrawtokens[entity_span[0]:entity_span[-1] + 1] = zip(rrawtokens[entity_span[0]:entity_span[-1] + 1], (
                            ['ENTITY_' + id2ners[entityner]] * (entity_span[-1] - entity_span[0] + 1)))

                rtokens[subj_span[0]:subj_span[-1] + 1] = ['SUBJ-' + rd['subj_type']] * (subj_span[-1] - subj_span[0] + 1)
                #rtokens[subj_span[0]:subj_span[-1] + 1] = ['ENTITY_' + rd['subj_type']] * (
                #            subj_span[-1] - subj_span[0] + 1)

                rsubjmask[subj_span[0]:subj_span[-1] + 1] = [0] * (subj_span[-1] - subj_span[0] + 1)
                rrawtokens[subj_span[0]:subj_span[-1] + 1] = zip(rrawtokens[subj_span[0]:subj_span[-1] + 1], (
                            ['RAWSUBJ-' + d['subj_type']] * (subj_span[-1] - subj_span[0] + 1)))
                rtokens[obj_span[0]:obj_span[-1] + 1] = ['OBJ-' + rd['obj_type']] * (obj_span[-1] - obj_span[0] + 1)
                robjmask[obj_span[0]:obj_span[-1]+1]=[0]*(obj_span[-1]-obj_span[0]+1)
                rrawtokens[obj_span[0]:obj_span[-1] + 1] = zip(rrawtokens[obj_span[0]:obj_span[-1] + 1], (['RAWOBJ-' + d['obj_type']] * (obj_span[-1] - obj_span[0] + 1)))
                rrawtokens.append(distance)
                rtokens = map_to_ids(rtokens, self.vocab.word2id)
                mask = [1] * len(rtokens)
                if containDot:
                    mask[-1] = 0
                aspect.append((rtokens, pos, rsubjmask, robjmask, ner, depmap, ret, rel, resrel, deprel, domain,sdp_domain,domain_subj,
                domain_obj, mask,sid,iscross, distance, relation,rrawtokens))

            if [src_subj,src_obj] not in relpairs:
                tree, domains, distanceraw,relpair,midhead,entity_chains,sdp_domain = head_to_treeEval(head, deprel,ner, pos,entity_ner,entity_pos,[[src_subj,src_obj]],build_mid=True)
                distance=distanceraw
                depmap, ret, rel, resrel, domain, sdp_domain,domain_subj, domain_obj = tree_to_adj(l, domains, tree,entity_chains,sdp_domain)
                obj_span=src_obj
                subj_span=src_subj
                rtokens = copy.deepcopy(tokens)
                rrawtokens = copy.deepcopy(tokens)
                rsubjmask = copy.deepcopy(subj_mask)
                robjmask = copy.deepcopy(obj_mask)

                for entity_pair in entity_chains[1:]:
                    entity_span=entity_pair[-1]
                    entityner=ner[entity_span[0]]
                    if entityner==2:
                        entityner=3
                    rtokens[entity_span[0]:entity_span[-1] + 1] = ['ENTITY_' +id2ners[entityner]] * (entity_span[-1] - entity_span[0] + 1)
                    rrawtokens[entity_span[0]:entity_span[-1] + 1] = zip(rrawtokens[entity_span[0]:entity_span[-1] + 1],(['ENTITY_' + id2ners[entityner]] * (entity_span[-1] - entity_span[0] + 1)))
                rtokens[subj_span[0]:subj_span[-1] + 1] = ['SUBJ-' + rd['subj_type']] * (subj_span[-1] - subj_span[0] + 1)

                # rtokens[subj_span[0]:subj_span[-1] + 1] = ['ENTITY_' + rd['subj_type']] * (
                #         subj_span[-1] - subj_span[0] + 1)
                rsubjmask[subj_span[0]:subj_span[-1] + 1] = [0] * (subj_span[-1] - subj_span[0] + 1)
                rrawtokens[subj_span[0]:subj_span[-1] + 1] = zip(rrawtokens[subj_span[0]:subj_span[-1] + 1], (
                        ['RAWSUBJ-' + d['subj_type']] * (subj_span[-1] - subj_span[0] + 1)))
                rtokens[obj_span[0]:obj_span[-1] + 1] = ['OBJ-' + rd['obj_type']] * (obj_span[-1] - obj_span[0] + 1)
                robjmask[obj_span[0]:obj_span[-1] + 1] = [0] * (obj_span[-1] - obj_span[0] + 1)
                rrawtokens[obj_span[0]:obj_span[-1] + 1] = zip(rrawtokens[obj_span[0]:obj_span[-1] + 1], (
                            ['RAWOBJ-' + d['obj_type']] * (obj_span[-1] - obj_span[0] + 1)))
                rrawtokens.append(distance)
                rtokens = map_to_ids(rtokens, self.vocab.word2id)
                mask = [1] * len(rtokens)
                if containDot:
                    mask[-1] = 0
                aspect.append(
                    (rtokens, pos, rsubjmask, robjmask, ner, depmap, ret, rel, resrel, deprel, domain, sdp_domain,domain_subj,
                     domain_obj, mask, sid, iscross,distance, relation, rrawtokens))
        # if len(aspect)<2 or relation!=0:
        #     return []
        # subj_type = [constant.SUBJ_NER_TO_ID[d['subj_type']]]
        # obj_type = [constant.OBJ_NER_TO_ID[d['obj_type']]]
        # processed += [(tokens, pos, ner, deprel, head, subj_positions, obj_positions, subj_type, obj_type, length,relation)]
        batch= [aspect]
        return batch
コード例 #10
0
 def input_to_adj(self, head, words, prune_k, subj_pos, obj_pos):
     tree = head_to_tree(head, words, len(words), prune_k, subj_pos,
                         obj_pos)
     adj = tree_to_adj(len(words), tree, directed=False, self_loop=False)
     adj = torch.from_numpy(adj)
     return adj