Ejemplo n.º 1
0
def create_model(dataset: str, model_config: dict, adaptor_config: dict,
                 device):
    supports = load_graph_data(dataset, 'doubletransition')
    supports = torch.tensor(list(map(sp.coo_matrix.toarray, supports)),
                            dtype=torch.float32,
                            device=device)

    edge_dim = supports.size(0)

    adaptor = STAdaptor(supports, **adaptor_config)
    predictor = Ours(edge_dim=edge_dim, **model_config)

    return Model(predictor, adaptor)
Ejemplo n.º 2
0
parser.add_argument('--sub_dataname', type=str,
                    help='subdata name.', default = 'DE')



args = parser.parse_args()
args.cuda = not args.no_cuda and torch.cuda.is_available()

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
    

# Load data
g, n_classes = load_graph_data(args.dataset)
features = g.ndata.pop('features')
labels = g.ndata.pop('labels')
    
num_class = labels.max()+1

if args.cuda:
    features = features.cuda()
    #adj = adj.cuda()
    labels = labels.cuda()
    #idx_train = idx_train.cuda()
    #idx_val = idx_val.cuda()
    #idx_test = idx_test.cuda()


def test_sage(model, idx_train, idx_val, idx_test):
Ejemplo n.º 3
0
    type_num_dict = {"category": 0, "product": 1, "user": 2}
    num_type_dict = {0: "category", 1: "product", 2: "user"}
elif dataset_str == "yelp":
    edge_types_strings = ["business_category", "business_user", "user_user"]
    type_num_dict = {"business": 0, "category": 1, "user": 2}
    num_type_dict = {0: "business", 1: "category", 2: "user"}

edge_types = []
for et in edge_types_strings:
    (i, j) = et.split("_")
    edge_types.append((type_num_dict[i], type_num_dict[j]))
    if i != j:
        edge_types.append((type_num_dict[j], type_num_dict[i]))

# Load data
G = load_graph_data(graph_path)

adjs_orig = get_edge_adj_matrices(G, {et: None for et in edge_types_strings})

# # get adjajcency matrices for subgraphs
adj_orig = nx.to_scipy_sparse_matrix(G)
adj_orig = adj_orig - sp.dia_matrix(
    (adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
adj_orig.eliminate_zeros()

p = dataset_path + "random_splits/" + edge_type + "/random" + str(
    random_seed) + "/"
G_train, test_positive_e, test_negative_e, val_positive_e, val_negative_e, train_edges = read_split(
    G, edge_type.split("_"), random_seed, p)

t0 = time.time()
Ejemplo n.º 4
0
def main(config):
    # set logger
    if not os.path.exists(os.path.join(config.base_dir, config.log_dir)):
        os.mkdir(os.path.join(config.base_dir, config.log_dir))
    logger = utils.get_logger(os.path.join(config.base_dir, config.log_dir),
                              "test",
                              log_filename=config.graph_name + ".log")

    # load dataset
    #graph_pkl_filename = '../dat/adj_mx.pkl'
    #graph_pkl_filename = os.path.join(config['base_dir'], config['data']['graph_pkl_filename'])
    graph_pkl_filename = os.path.join(config.base_dir, config.dataset_dir,
                                      config.graph_pkl_filename)
    _, _, adj_mat = utils.load_graph_data(graph_pkl_filename)
    data = utils.load_dataset(dataset_dir=os.path.join(config.base_dir,
                                                       config.dataset_dir),
                              batch_size=config.batch_size,
                              test_batch_size=config.batch_size)

    logger.info(f"data:")
    logger.info(
        f"x_train: {data['x_train'].shape}, y_train: {data['y_train'].shape}")
    logger.info(f"x_val: {data['x_val'].shape}, y_val: {data['y_val'].shape}")
    logger.info(
        f"x_test: {data['x_test'].shape}, y_test: {data['y_test'].shape}")

    train_data_loader = data['train_loader']
    val_data_loader = data['val_loader']
    test_data_loader = data['test_loader']

    num_train_sample = data['x_train'].shape[0]
    num_val_sample = data['x_val'].shape[0]
    num_test_sample = data['x_test'].shape[0]

    # get number of iterations per epoch for progress bar
    num_train_iteration_per_epoch = math.ceil(num_train_sample /
                                              config.batch_size)
    num_val_iteration_per_epoch = math.ceil(num_val_sample / config.batch_size)
    num_test_iteration_per_epoch = math.ceil(num_test_sample /
                                             config.batch_size)

    # setup data_loader instances
    # data_loader = config.initialize('data_loader', module_data)
    # valid_data_loader = data_loader.split_validation()

    # build model architecture, then print to console
    #adj_arg = {"adj_mat": adj_mat}
    logger.info(f"model architecture:")
    logger.info(
        f"num_rnn_layers: {config.num_rnn_layers}, run_units: {config.rnn_units}, max_diffusion_step: {config.max_diffusion_step}"
    )
    logger.info(
        f"n_in: {config.n_in}, n_out: {config.n_out}, epochs: {config.epochs}")
    logger.info(f"gpu: {config.n_gpu}")
    logger.info(
        f"input_dim: {config.input_dim}, output_dim: {config.output_dim}, num_nodes: {config.num_nodes}, batch_size: {config.batch_size}"
    )
    logger.info(
        f"enc_input_dim: {config.enc_input_dim}, dec_input_dim: {config.dec_input_dim}"
    )

    model = dcrnn_model.DCRNNModel(adj_mat, config.batch_size, config.enc_input_dim, config.dec_input_dim, \
            config.max_diffusion_step, config.num_nodes, config.num_rnn_layers, config.rnn_units, \
            config.output_dim, config.device)

    # model = getattr(module_arch, config['arch']['type'])(config['arch']['args'], adj_arg)

    # get function handles of loss and metrics
    loss = module_metric.masked_mae_loss(data['scaler'], 0.0)
    #loss = config.initialize('loss', module_metric, **{"scaler": data['scaler']})
    # metrics = [getattr(module_metric, met) for met in config['metrics']]

    # get inverse preds & labels
    inverse = module_metric.inverse_scaler(data['scaler'], 0.0)
    # build optimizer, learning rate scheduler. delete every lines containing lr_scheduler for disabling scheduler
    trainable_params = filter(lambda p: p.requires_grad, model.parameters())
    #optimizer = config.initialize('optimizer', torch.optim, trainable_params)
    optimizer = torch.optim.Adam(params=trainable_params,
                                 lr=config.base_lr,
                                 weight_decay=0.0,
                                 eps=config.epsilon,
                                 amsgrad=True)

    #lr_scheduler = config.initialize('lr_scheduler', torch.optim.lr_scheduler, optimizer)
    #lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=config['train']['lr_milestones'], gamma=config['train']['lr_decay_ratio'])
    lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(
        optimizer,
        milestones=config.lr_milestones,
        gamma=config.lr_decay_ratio)

    trainer = DCRNNTrainer(model,
                           loss,
                           optimizer,
                           config=config,
                           data_loader=train_data_loader,
                           logger=logger,
                           valid_data_loader=val_data_loader,
                           lr_scheduler=lr_scheduler,
                           len_epoch=num_train_iteration_per_epoch,
                           val_len_epoch=num_val_iteration_per_epoch)

    train_logs = trainer.train()
    epoch_loss = [i['loss'] for i in train_logs]
    val_loss = [i['val_loss'] for i in train_logs]

    tester = DCRNNTester(model,
                         loss,
                         inverse,
                         config,
                         data_loader=test_data_loader,
                         logger=logger,
                         test_data_loader=test_data_loader,
                         test_len_epoch=num_test_iteration_per_epoch)

    test_loss, test_mae, test_rmse, test_mape, test_outputs, test_targets = tester.predict(
    )
    #test_outputs = test_outputs.numpy()
    #test_targets = test_targets.numpy()

    #test_mae = test_metrics[0]
    #test_rmse = test_metrics[1]
    #test_mape = test_metrics[2]

    # TODO: need fixed
    # result
    #results = {"test": test_targets, "prediction": test_outputs, "true": test_outputs, "train_loss": epoch_loss,
    #           "val_loss": val_loss,"rmse": test_rmse, "steps_rmse": steps_rmse, "mae": test_mae,
    #           "mape": test_mape, "in_feats": config.in_feats, "out_feats": config.out_feats,
    #           "encode_hidden_size": config.encode_hidden_size,"decode_hidden_size": config.decode_hidden_size,
    #           "full_size": config.full_size, "frame": config.frame, "columns": config.columns}

    results = {
        "test": test_targets,
        "prediction": test_outputs,
        "train_loss": epoch_loss,
        "val_loss": val_loss,
        "rmse": test_rmse.tolist(),
        "mae": test_mae.tolist(),
        "mape": test_mape.tolist(),
        "input_dim": config.input_dim,
        "output_dim": config.output_dim,
        "n_in": config.n_in,
        "n_out": config.n_out,
        "num_rnn_layers:": config.num_rnn_layers,
        "run_units": config.rnn_units,
        "max_diffusion_step": config.max_diffusion_step,
        "enc_input_dim": config.enc_input_dim,
        "dec_input_dim": config.dec_input_dim,
        "num_nodes": config.num_nodes,
        "batch_size": config.batch_size
    }

    if not os.path.exists(os.path.join(config.base_dir, config.results_dir)):
        os.mkdir(os.path.join(config.base_dir, config.results_dir))

    with open(
            os.path.join(config.base_dir, config.results_dir) +
            "/{}.json".format(config.graph_name), 'w') as fout:
        fout.write(json.dumps(results))
Ejemplo n.º 5
0
args.cuda = not args.no_cuda and torch.cuda.is_available()

np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)

# =========
# Load data
# =========
lossfun = 1     #0:null_loss() ; 1:cross_entropy()  ; 2:MSE
n = 8

filename ='Catbox{}_xi_norm5%'.format(str(n))
imgs_data = np.array([load_data(image='../data/{}/H1.txt'.format(filename))])   # imgs_data = (feature, label)
adj = load_graph_data('../data/{}/Edge{}.txt'.format(filename,str(n)),'../data/{}/H1.txt'.format(filename))   # adj = (indices, values)

n_feature = imgs_data[0][0].shape[1]  # c = 3 (R,G,B)   # imgs_data[0][0]:feature ; imgs_data[0][1]: labels     # .shape[0] = rows num ;  .shape[1] = column num
n_class = 2     
col = 1

train_idx = range(0, 1)
test_idx = range(0, 1)

# ===================
# Model and optimizer
# ===================
model = GCN(nfeat=n_feature,
            nhid=args.hidden,
            nclass=n_class,
            dropout=args.dropout)