Exemple #1
0
 def __init__(self, data, word2idx, tolower=True):
     super(GraphData, self).__init__()
     g_p = utils.doc2graph(Document(data[config.pf]))
     g_h = utils.doc2graph(Document(data[config.hf]))
     self.edge_index_p = g_p.edge_index
     self.edge_index_h = g_h.edge_index
     #print(g_p.node_attr)
     # care [ROOT] [UNK] should not get lower!!!
     if tolower == True:
         self.x_p = torch.tensor([
             word2idx[w.lower() if w[0] != "[" or w[-1] != "]" else w]
             for w in g_p.node_attr
         ],
                                 dtype=torch.long)
         self.x_h = torch.tensor([
             word2idx[w.lower() if w[0] != "[" or w[-1] != "]" else w]
             for w in g_h.node_attr
         ],
                                 dtype=torch.long)
     else:
         print("not to lower")
         self.x_p = torch.tensor([word2idx[w] for w in g_p.node_attr],
                                 dtype=torch.long)
         self.x_h = torch.tensor([word2idx[w] for w in g_h.node_attr],
                                 dtype=torch.long)
     label_onehot = torch.zeros([1, config.NUM_CLASSES])
     label_onehot[0][data[config.lf]] = 1
     #label_onehot = label_onehot.squeeze()
     #print(label_onehot.size())
     self.label = label_onehot.to(dtype=torch.float)
     self.pid = data[config.idf]
Exemple #2
0
 def __init__(self, data, word2idx, tolower=True):
     super(GraphData, self).__init__()
     g_p = utils.doc2graph(Document(data[config.pf]))
     g_h = utils.doc2graph(Document(data[config.hf]))
     self.edge_index_p = g_p.edge_index
     self.edge_index_h = g_h.edge_index
     #print(g_p.node_attr)
     # care [ROOT] [UNK] should not get lower!!!
     if tolower == True:
         self.x_p = torch.tensor([
             word2idx[w.lower() if w[0] != "[" or w[-1] != "]" else w]
             for w in g_p.node_attr
         ],
                                 dtype=torch.long)
         self.x_h = torch.tensor([
             word2idx[w.lower() if w[0] != "[" or w[-1] != "]" else w]
             for w in g_h.node_attr
         ],
                                 dtype=torch.long)
     else:
         print("not to lower")
         self.x_p = torch.tensor([word2idx[w] for w in g_p.node_attr],
                                 dtype=torch.long)
         self.x_h = torch.tensor([word2idx[w] for w in g_h.node_attr],
                                 dtype=torch.long)
     self.label = data[config.lf]
     self.pid = data[config.idf]
Exemple #3
0
def check_mwt(filename):
    """
    Checks whether or not there are MWTs in the given conll file
    """
    doc = Document(CoNLL.conll2dict(filename))
    data = doc.get_mwt_expansions(False)
    return len(data) > 0