def generate_target(): target_matrix = generate_target_matrix(data[TARGET_REL_ID], args.target_n_samples, args.target_pos_rate, device) data_target = SparseMatrixData(target_schema) data_target[TARGET_REL_ID] = target_matrix data_target.to(device) return data_target
embedding_entity = schema.entities[TARGET_NODE_TYPE] input_channels = { rel.id: data[rel.id].n_channels for rel in schema.relations } embedding_schema = DataSchema( schema.entities, Relation(0, [embedding_entity, embedding_entity], is_set=True)) n_instances = embedding_entity.n_instances data_embedding = SparseMatrixData(embedding_schema) data_embedding[0] = SparseMatrix( indices=torch.arange(n_instances, dtype=torch.int64).repeat(2, 1), values=torch.zeros([n_instances, args.embedding_dim]), shape=(n_instances, n_instances, args.embedding_dim), is_set=True) data_embedding.to(device) target_schema = DataSchema(schema.entities, schema.relations[TARGET_REL_ID]) target_node_idx_to_id = dataloader.target_node_idx_to_id #%% net = SparseMatrixAutoEncoder(schema, input_channels, layers=args.layers, embedding_dim=args.embedding_dim, embedding_entities=[embedding_entity], activation=eval('nn.%s()' % args.act_fn), final_activation=nn.Sigmoid(), dropout=args.dropout_rate, norm=args.norm, pool_op=args.pool_op, norm_affine=args.norm_affine,
rel_movie_director = Relation(1, [ent_movie, ent_director]) rel_movie_keyword = Relation(2, [ent_movie, ent_keyword]) rel_movie_feature = Relation(3, [ent_movie, ent_movie], is_set=True) relations = [rel_movie_actor, rel_movie_director, rel_movie_keyword, rel_movie_feature] schema = DataSchema(entities, relations) schema_out = DataSchema([ent_movie], [Relation(0, [ent_movie, ent_movie], is_set=True)]) data = SparseMatrixData(schema) for rel_i, rel_name in enumerate(relation_names): if rel_name == 'movie_feature': values = preprocess_features(raw_data[rel_name]) data[rel_i] = SparseMatrix.from_embed_diag(values) else: data[rel_i] = SparseMatrix.from_scipy_sparse(raw_data[rel_name]) data = data.to(device) indices_identity, indices_transpose = data.calculate_indices() input_channels = {rel.id: data[rel.id].n_channels for rel in relations} data_target = Data(schema_out) n_movies = ent_movie.n_instances labels = [] with open(data_file_dir + 'index_label.txt', 'r') as label_file: lines = label_file.readlines() for line in lines: label = line.rstrip().split(',')[1] labels.append(int(label)) labels = torch.LongTensor(labels).to(device) - min(labels) shuffled_indices = random.sample(range(n_movies), n_movies) val_start = 0 test_start = int(args.val_pct * (n_movies/100.))