Exemple #1
0
    parser.add_argument("--epochs",
                        type=int,
                        default=10,
                        help="Number of epochs during training")
    parser.add_argument("--classifier",
                        type=str,
                        default='SVM',
                        choices={'NN', 'SVM'},
                        help="Downstream Classifier")
    parser.add_argument("--seed", type=int, default=0, help="Random Seed")
    args = parser.parse_args()
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)

    data_path = f'Datasets/{args.data_name}'
    db = Database.load_csv(data_path)

    model_dir = f'models/{args.data_name}/{args.kernel}_{args.depth}_{args.dim}_{args.num_samples}_{args.epochs}_{args.batch_size}_{args.seed}'
    os.makedirs(model_dir, exist_ok=True)

    sample_fct = ek_utlis.ek_sample_fct if args.kernel == 'EK' else mmd_utils.mmd_sample_fct

    Y, rows = db.get_labels()

    scores = []
    split = StratifiedShuffleSplit(train_size=0.9, random_state=0, n_splits=10)
    for i, (train_index, test_index) in enumerate(split.split(rows, Y)):

        samples = get_samples(db, args.depth, args.num_samples, sample_fct)
        row_idx = {r: i for i, r in enumerate(rows)}
        scheme_idx = {s: i for i, s in enumerate(samples.keys())}

def compute_embedding(db):
    G = db.get_row_val_graph_reg()
    embedding, model = get_node2vec_embedding_new(G, epochs=5)
    return embedding, model


if __name__ == "__main__":
    name = 'mutagenesis'
    embedding_name = 'testn3.pckl'

    path = f'Datasets/{name}'
    embedding_path = f'Embeddings/{name}/{embedding_name}'

    db = Database.load_csv(path)
    Y, rows = db.get_labels()

    scores = []
    split = StratifiedShuffleSplit(train_size=0.9, random_state=0, n_splits=10)
    for i, (train_index, test_index) in enumerate(split.split(rows, Y)):
        embedding, _ = io_utils.load_or_compute(f'{embedding_path}_{i}',
                                                lambda: compute_embedding(db))

        X_train = np.float32([embedding[rows[j]] for j in train_index])
        X_test = np.float32([embedding[rows[j]] for j in test_index])
        Y_train, Y_test = [Y[i]
                           for i in train_index], [Y[i] for i in test_index]

        clf = SVC(kernel='rbf', C=1.0)