Esempio n. 1
0
    label = data[0].to_numpy()
    dataset = data.drop(columns=[0]).to_numpy()

    model = UnsupervisedModel(
        dataset,
        dataset.shape[-1],
        config["output"],
        error=0.001,
        # error=config["output"],
        max_epochs=config["max_epochs"],
        lr=float(config["lr"]),
        algorithm=config["algorithm"],
        normalize=True,
        normal_params=(config["normal_params"]["mean"],
                       config["normal_params"]["var"]))

    train = True
    model_name = config["model_name"] + "_" + config["algorithm"]
    for f in glob.glob("*.npy"):
        if model_name + ".npy" == f:
            train = False
            break

    if train or config["force_train"]:
        print(model)
        model.train()
        model.save_model(model_name)
        model.plot_convergence(save=True, filename='sanger_ort.png')
    else:
        model.load_model(model_name)
        plot_data(model, dataset, label)
Esempio n. 2
0
    def main():
        parser = argparse.ArgumentParser(
            description='Arguments for unsupervised GraphSAGE model.')
        parser.add_argument('train_prefix',
                            help='Name of the object file that stores the ' +
                            'training data.')
        parser.add_argument("model_name",
                            choices=[
                                "graphsage_mean", "gcn", "graphsage_seq",
                                "graphsage_maxpool", "graphsage_meanpool"
                            ],
                            help="Model names.")
        parser.add_argument('--model_size',
                            choices=["small", "big"],
                            default="small",
                            help="Can be big or small; model specific def'ns")
        parser.add_argument('--learning_rate',
                            type=float,
                            default=0.00001,
                            help='Initial learning rate.')
        parser.add_argument('--epochs',
                            type=int,
                            default=10,
                            help='Number of epochs to train.')
        parser.add_argument('--dropout',
                            type=float,
                            default=0.0,
                            help='Dropout rate (1 - keep probability).')
        parser.add_argument('--weight_decay',
                            type=float,
                            default=0.0,
                            help='Weight for l2 loss on embedding matrix.')
        parser.add_argument('--max_degree',
                            type=int,
                            default=100,
                            help='Maximum node degree.')
        parser.add_argument('--samples_1',
                            type=int,
                            default=25,
                            help='Number of samples in layer 1.')
        parser.add_argument('--samples_2',
                            type=int,
                            default=10,
                            help='Number of users samples in layer 2.')
        parser.add_argument('--dim_1',
                            type=int,
                            default=128,
                            help='Size of output dim ' +
                            '(final is 2x this, if using concat)')
        parser.add_argument('--dim_2',
                            type=int,
                            default=128,
                            help='Size of output dim ' +
                            '(final is 2x this, if using concat)')
        parser.add_argument('--random_context',
                            action="store_false",
                            default=True,
                            help='Whether to use random context or direct ' +
                            'edges.')
        parser.add_argument('--neg_sample_size',
                            type=int,
                            default=20,
                            help='Number of negative samples.')
        parser.add_argument('--batch_size',
                            type=int,
                            default=512,
                            help='Minibatch size.')
        parser.add_argument('--identity_dim',
                            type=int,
                            default=0,
                            help='Set to positive value to use identity ' +
                            'embedding features of that dimension.')
        parser.add_argument('--save_embeddings',
                            action="store_false",
                            default=True,
                            help='Whether to save embeddings for all nodes ' +
                            'after training')
        parser.add_argument('--base_log_dir',
                            default='../../../data/processed/graphsage/',
                            help='Base directory for logging and saving ' +
                            'embeddings')
        parser.add_argument('--validate_iter',
                            type=int,
                            default=5000,
                            help='How often to run a validation minibatch.')
        parser.add_argument('--validate_batch_size',
                            type=int,
                            default=256,
                            help='How many nodes per validation sample.')
        parser.add_argument('--gpu',
                            type=int,
                            default=0,
                            help='Which gpu to use.')
        parser.add_argument('--print_every',
                            type=int,
                            default=50,
                            help='How often to print training info.')
        parser.add_argument('--max_total_steps',
                            type=int,
                            default=10**10,
                            help='Maximum total number of iterations.')
        parser.add_argument('--log_device_placement',
                            action="store_true",
                            default=False,
                            help='Whether to log device placement.')
        args = parser.parse_args()

        print("Starting...")
        print("Loading training data..")
        train_data = load_data(args.train_prefix, load_walks=True)
        print("Done loading training data..\n")
        from unsupervised_model import UnsupervisedModel
        model = UnsupervisedModel(
            args.train_prefix, args.model_name, args.model_size,
            args.learning_rate, args.epochs, args.dropout, args.weight_decay,
            args.max_degree, args.samples_1, args.samples_2, args.dim_1,
            args.dim_2, args.random_context, args.neg_sample_size,
            args.batch_size, args.identity_dim, args.save_embeddings,
            args.base_log_dir, args.validate_iter, args.validate_batch_size,
            args.gpu, args.print_every, args.max_total_steps,
            args.log_device_placement)
        model.train(train_data)
        print("Finished.")