def __init__(self, data, word2idx, tolower=True): super(GraphData, self).__init__() g_p = utils.doc2graph(Document(data[config.pf])) g_h = utils.doc2graph(Document(data[config.hf])) self.edge_index_p = g_p.edge_index self.edge_index_h = g_h.edge_index #print(g_p.node_attr) # care [ROOT] [UNK] should not get lower!!! if tolower == True: self.x_p = torch.tensor([ word2idx[w.lower() if w[0] != "[" or w[-1] != "]" else w] for w in g_p.node_attr ], dtype=torch.long) self.x_h = torch.tensor([ word2idx[w.lower() if w[0] != "[" or w[-1] != "]" else w] for w in g_h.node_attr ], dtype=torch.long) else: print("not to lower") self.x_p = torch.tensor([word2idx[w] for w in g_p.node_attr], dtype=torch.long) self.x_h = torch.tensor([word2idx[w] for w in g_h.node_attr], dtype=torch.long) label_onehot = torch.zeros([1, config.NUM_CLASSES]) label_onehot[0][data[config.lf]] = 1 #label_onehot = label_onehot.squeeze() #print(label_onehot.size()) self.label = label_onehot.to(dtype=torch.float) self.pid = data[config.idf]
def __init__(self, data, word2idx, tolower=True): super(GraphData, self).__init__() g_p = utils.doc2graph(Document(data[config.pf])) g_h = utils.doc2graph(Document(data[config.hf])) self.edge_index_p = g_p.edge_index self.edge_index_h = g_h.edge_index #print(g_p.node_attr) # care [ROOT] [UNK] should not get lower!!! if tolower == True: self.x_p = torch.tensor([ word2idx[w.lower() if w[0] != "[" or w[-1] != "]" else w] for w in g_p.node_attr ], dtype=torch.long) self.x_h = torch.tensor([ word2idx[w.lower() if w[0] != "[" or w[-1] != "]" else w] for w in g_h.node_attr ], dtype=torch.long) else: print("not to lower") self.x_p = torch.tensor([word2idx[w] for w in g_p.node_attr], dtype=torch.long) self.x_h = torch.tensor([word2idx[w] for w in g_h.node_attr], dtype=torch.long) self.label = data[config.lf] self.pid = data[config.idf]
def check_mwt(filename): """ Checks whether or not there are MWTs in the given conll file """ doc = Document(CoNLL.conll2dict(filename)) data = doc.get_mwt_expansions(False) return len(data) > 0