Example #1
0
 def generate_target():
     target_matrix = generate_target_matrix(data[TARGET_REL_ID],
                                            args.target_n_samples,
                                            args.target_pos_rate, device)
     data_target = SparseMatrixData(target_schema)
     data_target[TARGET_REL_ID] = target_matrix
     data_target.to(device)
     return data_target
Example #2
0
 embedding_entity = schema.entities[TARGET_NODE_TYPE]
 input_channels = {
     rel.id: data[rel.id].n_channels
     for rel in schema.relations
 }
 embedding_schema = DataSchema(
     schema.entities,
     Relation(0, [embedding_entity, embedding_entity], is_set=True))
 n_instances = embedding_entity.n_instances
 data_embedding = SparseMatrixData(embedding_schema)
 data_embedding[0] = SparseMatrix(
     indices=torch.arange(n_instances, dtype=torch.int64).repeat(2, 1),
     values=torch.zeros([n_instances, args.embedding_dim]),
     shape=(n_instances, n_instances, args.embedding_dim),
     is_set=True)
 data_embedding.to(device)
 target_schema = DataSchema(schema.entities,
                            schema.relations[TARGET_REL_ID])
 target_node_idx_to_id = dataloader.target_node_idx_to_id
 #%%
 net = SparseMatrixAutoEncoder(schema,
                               input_channels,
                               layers=args.layers,
                               embedding_dim=args.embedding_dim,
                               embedding_entities=[embedding_entity],
                               activation=eval('nn.%s()' % args.act_fn),
                               final_activation=nn.Sigmoid(),
                               dropout=args.dropout_rate,
                               norm=args.norm,
                               pool_op=args.pool_op,
                               norm_affine=args.norm_affine,
    rel_movie_director = Relation(1, [ent_movie, ent_director])
    rel_movie_keyword = Relation(2, [ent_movie, ent_keyword])
    rel_movie_feature = Relation(3, [ent_movie, ent_movie], is_set=True)
    relations = [rel_movie_actor, rel_movie_director, rel_movie_keyword, rel_movie_feature]

    schema = DataSchema(entities, relations)
    schema_out = DataSchema([ent_movie], [Relation(0, [ent_movie, ent_movie], is_set=True)])

    data = SparseMatrixData(schema)
    for rel_i, rel_name in enumerate(relation_names):
        if rel_name == 'movie_feature':
            values = preprocess_features(raw_data[rel_name])
            data[rel_i] = SparseMatrix.from_embed_diag(values)
        else:
            data[rel_i] = SparseMatrix.from_scipy_sparse(raw_data[rel_name])
    data = data.to(device)
    indices_identity, indices_transpose = data.calculate_indices()
    input_channels = {rel.id: data[rel.id].n_channels for rel in relations}
    data_target = Data(schema_out)
    n_movies = ent_movie.n_instances
    labels = []
    with open(data_file_dir + 'index_label.txt', 'r') as label_file:
        lines = label_file.readlines()
        for line in lines:
            label = line.rstrip().split(',')[1]
            labels.append(int(label))
    labels = torch.LongTensor(labels).to(device) - min(labels)

    shuffled_indices = random.sample(range(n_movies), n_movies)
    val_start = 0
    test_start = int(args.val_pct * (n_movies/100.))