def make_data(self, tucker):
     data = Data(self.schema)
     for rel in self.schema.relations:
         embeddings = [self.embeddings[ent.id] for ent in rel.entities]
         rel_data = self.calculate_relation(tucker, *embeddings)
         data[rel.id] = torch.tensor(rel_data,
                                     dtype=torch.float32).unsqueeze(0)
         if self.batch_dim:
             data[rel.id] = data[rel.id].unsqueeze(0)
     return data
 def forward(self, data):
     data_out = Data(self.schema)
     for relation_i, relation_j in self.relation_pairs:
         X = data[relation_i.id]
         layer = self.block_modules[str((relation_i.id, relation_j.id))]
         out = layer.forward(X)
         if relation_j.id not in data_out:
             data_out[relation_j.id] = out
         else:
             data_out[relation_j.id] = data_out[relation_j.id] + out
     return data_out
예제 #3
0
 def forward(self, data):
     out = Data(self.enc_schema, batch_size=data.batch_size)
     for entity in self.schema.entities:
         for relation in self.schema.relations.values():
             if entity not in relation.entities:
                 continue
             else:
                 pooling_dims = self.get_pooling_dims(entity, relation)
                 data_rel = data[relation.id]
                 entity_out = self.pool_tensor(data_rel, pooling_dims)
                 entity_out = self.pool_tensor_diag(entity_out)
         out[entity.id] = entity_out
     return out
예제 #4
0
 def forward(self, data):
     data_out = Data(self.schema)
     for relation_i, relation_j in self.relation_pairs:
         self.logger.info("Relation: ({}, {})".format(
             relation_i.id, relation_j.id))
         X_in = data[relation_i.id]
         Y_in = data[relation_j.id]
         layer = self.block_modules[str((relation_i.id, relation_j.id))]
         Y_out = layer.forward(X_in, Y_in)
         if relation_j.id not in data_out:
             data_out[relation_j.id] = Y_out
         else:
             data_out[relation_j.id] = data_out[relation_j.id] + Y_out
     return data_out
    def generate_data(self, n_dim_ent=5, batch_size=1):
        if self.embeddings == None:
            self.generate_embeddings(n_dim_ent, batch_size)

        # TODO: make two-channeled
        def rel_student_fn(embedding):
            return 100 * np.mean(np.abs(np.sin(embedding)), 1)

        def rel_courses_fn(embedding):
            return 100 * np.round(np.sum(np.arctan(np.exp(embedding)), 1))

        def rel_professor_fn(embedding):
            return np.sum(np.sign(embedding), 1) + n_dim_ent

        def rel_takes_fn(embedding_student, embedding_course):
            return 100 / (1 + np.exp(embedding_student @ embedding_course.T))

        def rel_ref_fn(embedding_student, embedding_professor):
            return np.sign(embedding_student @ embedding_professor.T)

        def rel_teaches_fn(embed_professor, embed_course):
            return 50 + 50 * (np.sin(embed_professor) @ np.cos(embed_course).T)

        def rel_prereq_fn(embed_course1, embed_course2):
            return 50 * np.pi * np.arctan(embed_course1 @ embed_course2.T)

        rel_fns = {
            0: rel_takes_fn,
            1: rel_ref_fn,
            2: rel_teaches_fn,
            3: rel_prereq_fn,
            4: rel_student_fn,
            5: rel_courses_fn,
            6: rel_professor_fn
        }

        # TODO: change sparsity
        data = Data(self.schema)
        for relation in self.schema.relations:
            entities = relation.entities
            relation_data = torch.zeros(batch_size, 1, *relation.get_shape())
            for batch in range(batch_size):
                ent_embeddings = [
                    self.embeddings[ent.id][batch] for ent in entities
                ]
                relation_data[batch] = torch.tensor(
                    rel_fns[relation.id](*ent_embeddings))
            data[relation.id] = relation_data
        return data
    def make_observed(self, sparsity, n_channels=1, min_val=-2, max_val=2):
        data = Data(self.schema)
        for rel in self.schema.relations:
            n_entries = int(sparsity * rel.get_n_entries())

            indices = np.zeros((len(rel.entities), n_entries))
            for i, entity in enumerate(rel.entities):
                indices[i] = np.random.randint(0, entity.n_instances,
                                               n_entries)

            values = np.single(
                np.random.uniform(min_val, max_val, (n_channels, n_entries)))
            shape = np.array(rel.get_shape())
            data[rel.id] = SparseTensor(indices, values, shape).coalesce()

        return data
예제 #7
0
    def __init__(self):
        data_raw = {
            rel_name: {key: list()
                       for key in schema_dict[rel_name].keys()}
            for rel_name in schema_dict.keys()
        }

        for relation_name in relation_names:
            with open(csv_file_str.format(relation_name)) as file:
                reader = csv.reader(file)
                keys = schema_dict[relation_name].keys()
                for cols in reader:
                    for key, col in zip(keys, cols):
                        data_raw[relation_name][key].append(col)

        ent_person = Entity(0, len(data_raw['person']['p_id']))
        ent_course = Entity(1, len(data_raw['course']['course_id']))
        entities = [ent_person, ent_course]

        rel_person = Relation(0, [ent_person, ent_person], is_set=True)
        rel_course = Relation(1, [ent_course, ent_course], is_set=True)
        rel_advisedBy = Relation(2, [ent_person, ent_person])
        rel_taughtBy = Relation(3, [ent_course, ent_person])
        relations = [rel_person, rel_course, rel_advisedBy, rel_taughtBy]

        self.schema = DataSchema(entities, relations)
        self.data = SparseMatrixData(self.schema)

        ent_id_to_idx_dict = {
            'person': self.id_to_idx(data_raw['person']['p_id']),
            'course': self.id_to_idx(data_raw['course']['course_id'])
        }

        for relation in relations:
            relation_name = relation_names[relation.id]
            print(relation_name)
            if relation.is_set:
                data_matrix = self.set_relation_to_matrix(
                    relation, schema_dict[relation_name],
                    data_raw[relation_name])
            else:
                if relation_name == 'advisedBy':
                    ent_n_id_str = 'p_id'
                    ent_m_id_str = 'p_id_dummy'
                elif relation_name == 'taughtBy':
                    ent_n_id_str = 'course_id'
                    ent_m_id_str = 'p_id'
                data_matrix = self.binary_relation_to_matrix(
                    relation, schema_dict[relation_name],
                    data_raw[relation_name], ent_id_to_idx_dict, ent_n_id_str,
                    ent_m_id_str)
            self.data[relation.id] = data_matrix

        self.target = self.get_targets(
            data_raw[self.TARGET_RELATION][self.TARGET_KEY],
            schema_dict[self.TARGET_RELATION][self.TARGET_KEY])
        self.target_rel_id = 0
        rel_out = Relation(self.target_rel_id, [ent_person, ent_person],
                           is_set=True)
        self.schema_out = DataSchema([ent_person], [rel_out])
        self.data_target = Data(self.schema_out)
        n_output_classes = len(
            np.unique(data_raw[self.TARGET_RELATION][self.TARGET_KEY]))
        self.output_dim = n_output_classes
        n_person = ent_person.n_instances
        self.data_target[self.target_rel_id] = SparseMatrix(
            indices=torch.arange(n_person, dtype=torch.int64).repeat(2, 1),
            values=torch.zeros([n_person, n_output_classes]),
            shape=(n_person, n_person, n_output_classes))
예제 #8
0
    def __init__(self):
        self.target_relation = 'advisedBy'

        data_raw = {
            rel_name: {key: list()
                       for key in schema_dict[rel_name].keys()}
            for rel_name in schema_dict.keys()
        }

        for relation_name in relation_names:
            with open(csv_file_str.format(relation_name)) as file:
                reader = csv.reader(file)
                keys = schema_dict[relation_name].keys()
                for cols in reader:
                    for key, col in zip(keys, cols):
                        data_raw[relation_name][key].append(col)

        ent_person = Entity(0, len(data_raw['person']['p_id']))
        ent_course = Entity(1, len(data_raw['course']['course_id']))
        entities = [ent_person, ent_course]

        rel_person_matrix = Relation(0, [ent_person, ent_person], is_set=True)
        rel_person = Relation(0, [ent_person])
        rel_course_matrix = Relation(1, [ent_course, ent_course], is_set=True)
        rel_course = Relation(1, [ent_course])
        rel_advisedBy = Relation(2, [ent_person, ent_person])
        rel_taughtBy = Relation(3, [ent_course, ent_person])
        relations_matrix = [
            rel_person_matrix, rel_course_matrix, rel_advisedBy, rel_taughtBy
        ]
        relations = [rel_person, rel_course, rel_taughtBy]

        self.target_rel_id = 2
        self.schema = DataSchema(entities, relations)
        schema_matrix = DataSchema(entities, relations_matrix)
        matrix_data = SparseMatrixData(schema_matrix)

        ent_id_to_idx_dict = {
            'person': self.id_to_idx(data_raw['person']['p_id']),
            'course': self.id_to_idx(data_raw['course']['course_id'])
        }

        for relation in relations_matrix:
            relation_name = relation_names[relation.id]
            print(relation_name)
            if relation.is_set:
                data_matrix = self.set_relation_to_matrix(
                    relation, schema_dict[relation_name],
                    data_raw[relation_name])
            else:
                if relation_name == 'advisedBy':
                    ent_n_id_str = 'p_id'
                    ent_m_id_str = 'p_id_dummy'
                elif relation_name == 'taughtBy':
                    ent_n_id_str = 'course_id'
                    ent_m_id_str = 'p_id'
                data_matrix = self.binary_relation_to_matrix(
                    relation, schema_dict[relation_name],
                    data_raw[relation_name], ent_id_to_idx_dict, ent_n_id_str,
                    ent_m_id_str)
            matrix_data[relation.id] = data_matrix

        rel_out = Relation(2, [ent_person, ent_person])
        self.schema_out = DataSchema([ent_person], [rel_out])

        self.output_dim = 1
        data = Data(self.schema)
        for rel_matrix in schema_matrix.relations:
            for rel in self.schema.relations:
                if rel_matrix.id == rel.id:
                    data_matrix = matrix_data[rel_matrix.id]
                    if rel_matrix.is_set:
                        dense_data = torch.diagonal(data_matrix.to_dense(), 0,
                                                    1, 2).unsqueeze(0)
                    else:
                        dense_data = data_matrix.to_dense().unsqueeze(0)
                    data[rel.id] = dense_data
        self.data = data

        self.target = matrix_data[self.target_rel_id].to_dense().squeeze()
예제 #9
0
 def forward(self, encodings):
     data_out = Data(self.schema)
     for relation in self.schema.relations.values():
         data_out[relation.id] = self.make_relation(encodings, relation)
     return data_out
예제 #10
0
    #ent_classes = Entity(1, n_classes)
    ent_words = Entity(1, n_words)
    rel_paper = Relation(0, [ent_papers, ent_papers], is_set=True)
    rel_cites = Relation(0, [ent_papers, ent_papers])
    rel_content = Relation(1, [ent_papers, ent_words])
    schema = DataSchema([ent_papers, ent_words], [rel_cites, rel_content])
    schema_out = DataSchema([ent_papers], [rel_paper])
    targets = torch.LongTensor(paper[1])

    data = SparseMatrixData(schema)
    data[0] = cites_matrix
    data[1] = content_matrix

    indices_identity, indices_transpose = data.calculate_indices()

    data_target = Data(schema_out)
    data_target[0] = SparseMatrix(indices=torch.arange(
        n_papers, dtype=torch.int64).repeat(2, 1),
                                  values=torch.zeros([n_papers, n_classes]),
                                  shape=(n_papers, n_papers, n_classes))
    data_target = data_target.to(device)

    #%%

    # Loss function:
    def classification_loss(data_pred, data_true):
        return F.cross_entropy(data_pred, data_true)

    n_channels = 1
    net = SparseMatrixEntityPredictor(schema,
                                      n_channels,
예제 #11
0
for i in progress:
    optimizer.zero_grad()
    data_out = net.forward(data_hidden)
    train_loss = loss_fcn(data_out, data_hidden, observed)
    train_loss.backward()
    optimizer.step()
    with torch.no_grad():
        val_loss = loss_fcn(data_out, data, missing)
        sched.step(val_loss)
    progress.set_description("Train: {:.4f}, Val: {:.4f}".format(
        train_loss.item(), val_loss.item()))

#%%
# Predict means (i.e. 0)
fake_data_out = Data(
    schema,
    {key: torch.zeros_like(val)
     for key, val in data.rel_tensors.items()})
print(loss_fcn(fake_data_out, data, observed))

#%%
encoding_size = net.get_encoding_size()
total_encoding_size = sum([enc[0] * enc[1] for enc in encoding_size.values()])
num_els = {key: val.numel() for key, val in data.items()}
print("Total datapoints: ", sum(d.numel() for d in data.values()))
print("Total params: ",
      sum(p.numel() for p in net.parameters() if p.requires_grad))


def std_mean_per_entity(data_pred, data_true):
    '''For each entity instance in each relation, get the difference between
    the predicted and actual means. Return mean and std of these differences
예제 #12
0
def load_data():
    paper_names = []
    classes = []
    word_names = ['word'+str(i+1) for i in range(1433)]

    with open(csv_file_str.format('paper')) as paperfile:
        reader = csv.reader(paperfile)
        for paper_name, class_name in reader:
            paper_names.append(paper_name)
            classes.append(class_name)

    class_names = list(np.unique(classes))
    class_name_to_idx = {class_name : i for i, class_name in enumerate(class_names)}
    paper_name_to_idx = {paper_name: i for i, paper_name in enumerate(paper_names)}
    paper = np.array([[paper_name_to_idx[paper_name] for paper_name in paper_names],
                      [class_name_to_idx[class_name] for class_name in classes]])

    cites = []
    with open(csv_file_str.format('cites')) as citesfile:
        reader = csv.reader(citesfile)
        for citer, citee in reader:
            cites.append([paper_name_to_idx[citer], paper_name_to_idx[citee]])
    cites = np.array(cites).T

    content = []
    def word_to_idx(word):
        '''
        words all formatted like: "word1328"
        '''
        return int(word[4:]) - 1

    with open(csv_file_str.format('content')) as contentfile:
        reader = csv.reader(contentfile)
        for paper_name, word_name in reader:
            content.append([paper_name_to_idx[paper_name],
                            word_to_idx(word_name)])
    content = np.array(content).T

    n_papers = len(paper_names)
    n_classes = len(class_names)
    n_words = len(word_names)
    ent_papers = Entity(0, n_papers)
    ent_classes = Entity(1, n_classes)
    ent_words = Entity(2, n_words)
    entities = [ent_papers, ent_classes, ent_words]
    rel_paper = Relation(0, [ent_papers, ent_classes])
    rel_cites = Relation(1, [ent_papers, ent_papers])
    rel_content = Relation(2, [ent_papers, ent_words])
    relations = [rel_paper, rel_cites, rel_content]
    schema = DataSchema(entities, relations)

    class_targets = torch.LongTensor(paper[1])

    paper_matrix = torch.zeros(n_papers, n_classes)
    paper_matrix[paper] = 1
    
    cites_matrix = torch.zeros(n_papers, n_papers)
    cites_matrix[cites] = 1
    
    content_matrix = torch.zeros(n_papers, n_words)
    content_matrix[content] = 1
    
    

    data = Data(schema)
    data[0] = paper_matrix.unsqueeze(0).unsqueeze(0)
    data[1] = cites_matrix.unsqueeze(0).unsqueeze(0)
    data[2] = content_matrix.unsqueeze(0).unsqueeze(0)
    return data, schema, class_targets