Beispiel #1
0
def main():

    parser = argparse.ArgumentParser(description="Run ANI1 neural net training.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('--ani-lib', required=True, help="Location of the shared object for GPU featurization")
    parser.add_argument('--fitted', default=False, action='store_true', help="Whether or use fitted or self-ixn")
    parser.add_argument('--add-ffdata', default=False, action='store_true', help="Whether or not to add the forcefield data")
    parser.add_argument('--gpus', default=1, help="Number of gpus we use")

    parser.add_argument('--save-dir', default='~/work', help="Location where save data is dumped. If the folder does not exist then it will be created.")
    parser.add_argument('--train-dir', default='~/ANI-1_release', help="Location where training data is located")

    parser.add_argument(
        '--reactivity-dir',
        default=None,
        help='location of reactivity data'
    )

    parser.add_argument(
        '--reactivity-test-percent',
        default=0.25,
        type=float,
        help='percent of reactions to put in test set'
    )

    parser.add_argument(
        '--deep-network',
        action='store_true',
        help='Use James super deep network (256, 256, 256, 256, 256, 256, 256, 128, 64, 8, 1)'
    )

    parser.add_argument(
        '--fit-charges',
        action='store_true',
        help='fit charges'
    )

    parser.add_argument(
        '--activation-function',
        type=str,
        choices=activations.get_all_fn_names(),
        help='choice of activation function',
        default="celu"
    )

    parser.add_argument(
        '--convert-checkpoint',
        default=False,
        action='store_true',
        help='Convert a checkpoint file to a numpy file and exit'
    )

    parser.add_argument(
        '--precision',
        default='single',
        type=str,
        choices=PRECISION.keys(),
        help="Floating point precision of NN"
    )

    args = parser.parse_args()

    print("Arguments", args)

    lib_path = os.path.abspath(args.ani_lib)
    print("Loading custom kernel from", lib_path)
    initialize_module(lib_path)

    ANI_TRAIN_DIR = args.train_dir
    ANI_SAVE_DIR = args.save_dir

    # save_dir = os.path.join(ANI_SAVE_DIR, "save")
    save_file = os.path.join(ANI_SAVE_DIR, "save_file.npz")

    use_fitted = args.fitted
    add_ffdata = args.add_ffdata

    data_loader = DataLoader(False)

    all_Xs, all_Ys = data_loader.load_gdb8(ANI_TRAIN_DIR)

    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(all_Xs, all_Ys, test_size=0.25) # stratify by UTT would be good to try here
    rd_train, rd_test = RawDataset(X_train, y_train), RawDataset(X_test,  y_test)

    X_gdb11, y_gdb11 = data_loader.load_gdb11(ANI_TRAIN_DIR)
    rd_gdb11 = RawDataset(X_gdb11, y_gdb11)

    rd_rxn_test, rd_rxn_train, rd_rxn_all, rd_rxn_big = \
        (None, None, None, None)
    if args.reactivity_dir is not None:
        # add training data
        X_rxn_train, Y_rxn_train, X_rxn_test, Y_rxn_test, X_rxn_big, Y_rxn_big = \
            load_reactivity_data(args.reactivity_dir, args.reactivity_test_percent)

        X_train.extend(X_rxn_train)
        y_train.extend(Y_rxn_train)

        print("Number of reactivity points in training set {0:d}".format(len(Y_rxn_train)))
        print("Number of reactivity points in test set {0:d}".format(len(Y_rxn_test)))

        # keep reaction test set separate
        rd_rxn_test = RawDataset(X_rxn_test, Y_rxn_test) if X_rxn_test else None
        rd_rxn_train = RawDataset(X_rxn_train, Y_rxn_train) if X_rxn_train else None

        # redundant, can be eliminated
        rd_rxn_all = RawDataset(X_rxn_test + X_rxn_train, Y_rxn_test + Y_rxn_train)
        
        # cannot currently handle this in test either
        # everything over 32 atoms
        rd_rxn_big = RawDataset(X_rxn_big, Y_rxn_big)

    batch_size = 1024

    config = tf.ConfigProto(allow_soft_placement=True)

    with tf.Session(config=config) as sess:

        # This training code implements cross-validation based training, whereby we determine convergence on a given
        # epoch depending on the cross-validation error for a given validation set. When a better cross-validation
        # score is detected, we save the model's parameters as the putative best found parameters. If after more than
        # max_local_epoch_count number of epochs have been run and no progress has been made, we decrease the learning
        # rate and restore the best found parameters.

        n_gpus = int(args.gpus)
        if n_gpus > 0:
            towers = ["/gpu:"+str(i) for i in range(n_gpus)]
        else:
            towers = ["/cpu:"+str(i) for i in range(multiprocessing.cpu_count())]

        layers = (128, 128, 64, 1)
        if args.deep_network:
            layers = (256, 256, 256, 256, 256, 256, 256, 128, 64, 8, 1)

        print("Soft placing operations onto towers:", towers)

        activation_fn = activations.get_fn_by_name(args.activation_function)
        precision = PRECISION[args.precision]

        trainer = TrainerMultiTower(
            sess,
            towers=towers,
            precision=precision,
            layer_sizes=layers,
            activation_fn=activation_fn,
            fit_charges=args.fit_charges,
        )

        if args.convert_checkpoint:
            print("Converting saved network to numpy")
            save_dir = os.path.join(args.save_dir, "save")
            trainer.load(save_dir)
            trainer.save_numpy(save_file)
            print("Complete, exiting")
            return

        if os.path.exists(save_file):
            print("Restoring existing model from", save_file)
            trainer.load_numpy(save_file)
        else:
            if not os.path.exists(ANI_SAVE_DIR):
                print("Save directory",ANI_SAVE_DIR,"does not existing... creating")
                os.makedirs(ANI_SAVE_DIR)
            trainer.initialize() # initialize to random variables

        max_local_epoch_count = 10

        train_ops = [
            trainer.global_epoch_count,
            trainer.learning_rate,
            trainer.local_epoch_count,
            trainer.unordered_l2s,
            trainer.train_op,
        ]

        best_test_score = trainer.eval_abs_rmse(rd_test)

        # Uncomment if you'd like to inspect the gradients
        # all_grads = []
        # for grad in trainer.coordinate_gradients(rd_test):
        #     all_grads.append(grad)
        # assert len(all_grads) == rd_test.num_mols()

        print("------------Starting Training--------------")

        start_time = time.time()

        while sess.run(trainer.learning_rate) > 5e-10: # this is to deal with a numerical error, we technically train to 1e-9

            while sess.run(trainer.local_epoch_count) < max_local_epoch_count:

                # sess.run(trainer.max_norm_ops) # should this run after every batch instead?

                start_time = time.time()
                train_results = list(trainer.feed_dataset(
                    rd_train,
                    shuffle=True,
                    target_ops=train_ops,
                    batch_size=batch_size,
                    before_hooks=trainer.max_norm_ops))

                global_epoch = train_results[0][0]
                time_per_epoch = time.time() - start_time
                train_abs_rmse = np.sqrt(np.mean(flatten_results(train_results, pos=3))) * HARTREE_TO_KCAL_PER_MOL
                learning_rate = train_results[0][1]
                local_epoch_count = train_results[0][2]

                test_abs_rmse = trainer.eval_abs_rmse(rd_test)
                print(time.strftime("%Y-%m-%d %H:%M:%S"), 'tpe:', "{0:.2f}s,".format(time_per_epoch), 'g-epoch', global_epoch, 'l-epoch', local_epoch_count, 'lr', "{0:.0e}".format(learning_rate), \
                    'train/test abs rmse:', "{0:.2f} kcal/mol,".format(train_abs_rmse), "{0:.2f} kcal/mol".format(test_abs_rmse), end='')

                if test_abs_rmse < best_test_score:
                    gdb11_abs_rmse = trainer.eval_abs_rmse(rd_gdb11)
                    print(' | gdb11 abs rmse', "{0:.2f} kcal/mol | ".format(gdb11_abs_rmse), end='')

                    best_test_score = test_abs_rmse
                    sess.run([trainer.incr_global_epoch_count, trainer.reset_local_epoch_count])

                    # info about reactivity training
                    rxn_pairs = [
                        (rd_rxn_train, "train"),
                        (rd_rxn_test, "test"),
                        (rd_rxn_all, "all"),
                        (rd_rxn_big, "big")
                    ]
                    for rd, name in rxn_pairs: 
                        if rd is not None:
                            rxn_abs_rmse = trainer.eval_abs_rmse(rd)
                            print(
                                ' | reactivity abs rmse ({0:s})'.format(name),
                                "{0:.2f} kcal/mol | ".format(rxn_abs_rmse),
                                end=''
                            )
                            # should really be a weighted ave
                            if name == "test":
                                best_test_score += rxn_abs_rmse

                else:
                    sess.run([trainer.incr_global_epoch_count, trainer.incr_local_epoch_count])

                trainer.save_numpy(save_file)

                print('', end='\n')

            print("==========Decreasing learning rate==========")
            sess.run(trainer.decr_learning_rate)
            sess.run(trainer.reset_local_epoch_count)
            # trainer.load_best_params()

    return
Beispiel #2
0
def main():

    parser = argparse.ArgumentParser(
        description="Run ANI1 neural net training.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument(
        '--ani-lib',
        required=True,
        help="Location of the shared object for GPU featurization")
    parser.add_argument('--fitted',
                        default=False,
                        action='store_true',
                        help="Whether or use fitted or self-ixn")
    parser.add_argument('--add_ffdata',
                        default=False,
                        action='store_true',
                        help="Whether or not to add the forcefield data")
    parser.add_argument('--gpus', default=1, help="Number of gpus we use")
    parser.add_argument('--train_forces',
                        default=True,
                        help="If we train to the forces")

    parser.add_argument('--save-dir',
                        default='~/work',
                        help="location where save data is dumped")
    parser.add_argument('--train-dir',
                        default='~/ANI-1_release',
                        help="location where work data is dumped")

    args = parser.parse_args()

    print("Arguments", args)

    lib_path = os.path.abspath(args.ani_lib)
    print("Loading custom kernel from", lib_path)
    initialize_module(lib_path)

    ANI_TRAIN_DIR = args.train_dir
    ANI_SAVE_DIR = args.save_dir

    save_dir = os.path.join(ANI_SAVE_DIR, "save")

    use_fitted = args.fitted
    add_ffdata = args.add_ffdata

    data_loader = DataLoader(False)

    all_Xs, all_Ys = data_loader.load_gdb8(ANI_TRAIN_DIR)

    # todo: ensure disjunction in train_test_valid
    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(
        all_Xs, all_Ys,
        test_size=0.25)  # stratify by UTT would be good to try here
    rd_train, rd_test = RawDataset(X_train,
                                   y_train), RawDataset(X_test, y_test)

    X_gdb11, y_gdb11 = data_loader.load_gdb11(ANI_TRAIN_DIR)
    rd_gdb11 = RawDataset(X_gdb11, y_gdb11)

    batch_size = 1024

    config = tf.ConfigProto(allow_soft_placement=True)

    all_Xs_f, all_Ys_f, all_Fs_f = data_loader.load_gdb8_forces(
        ANI_TRAIN_DIR)  # todo: figure out how to split this consistently later

    rd_train_forces = RawDataset(all_Xs_f, all_Ys_f, all_Fs_f)

    with tf.Session(config=config) as sess:

        # This training code implements cross-validation based training, whereby we determine convergence on a given
        # epoch depending on the cross-validation error for a given validation set. When a better cross-validation
        # score is detected, we save the model's parameters as the putative best found parameters. If after more than
        # max_local_epoch_count number of epochs have been run and no progress has been made, we decrease the learning
        # rate and restore the best found parameters.

        n_gpus = int(args.gpus)
        if n_gpus > 0:
            towers = ["/gpu:" + str(i) for i in range(n_gpus)]
        else:
            towers = [
                "/cpu:" + str(i) for i in range(multiprocessing.cpu_count())
            ]

        print("towers:", towers)

        trainer = TrainerMultiTower(
            sess,
            towers=towers,
            precision=tf.float32,
            layer_sizes=(128, 128, 64, 1),
            # fit_charges=True,
        )

        # if os.path.exists(save_dir):
        # print("Restoring existing model from", save_dir)
        # trainer.load(save_dir)
        # else:
        trainer.initialize()  # initialize to random variables

        max_local_epoch_count = 10

        train_ops = [
            trainer.global_epoch_count,
            trainer.learning_rate,
            trainer.local_epoch_count,
            trainer.unordered_l2s,
            trainer.train_op,
        ]

        print("------------Starting Training--------------")

        start_time = time.time()

        train_forces = bool(int(args.train_forces))  # python is retarded

        # training with forces
        while sess.run(
                trainer.learning_rate
        ) > 5e-10:  # this is to deal with a numerical error, we technically train to 1e-9

            while sess.run(trainer.local_epoch_count) < max_local_epoch_count:

                start_time = time.time()
                # train to forces
                if train_forces:
                    train_results_forces = list(
                        trainer.feed_dataset(
                            rd_train_forces,
                            shuffle=True,
                            target_ops=[
                                trainer.train_op_forces,
                                trainer.tower_force_rmses
                            ],
                            batch_size=batch_size,
                            before_hooks=trainer.max_norm_ops))
                    print(train_results_forces, end=" | ")

                #train to energies
                train_results_energies = list(
                    trainer.feed_dataset(rd_train,
                                         shuffle=True,
                                         target_ops=train_ops,
                                         batch_size=batch_size,
                                         before_hooks=trainer.max_norm_ops))

                train_abs_rmse = np.sqrt(
                    np.mean(flatten_results(train_results_energies,
                                            pos=3))) * HARTREE_TO_KCAL_PER_MOL
                test_abs_rmse = trainer.eval_abs_rmse(rd_test)
                gdb11_abs_rmse = trainer.eval_abs_rmse(rd_gdb11)

                print(time.time() - start_time, train_abs_rmse, test_abs_rmse,
                      gdb11_abs_rmse)

            print("==========Decreasing learning rate==========")
            sess.run(trainer.decr_learning_rate)
            sess.run(trainer.reset_local_epoch_count)
            trainer.load_best_params()

    return
Beispiel #3
0
def main():

    parser = argparse.ArgumentParser(description="Run ANI1 neural net training.", formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('--ani-lib', required=True, help="Location of the shared object for GPU featurization")
    parser.add_argument('--fitted', default=False, action='store_true', help="Whether or use fitted or self-ixn")
    parser.add_argument('--add-ffdata', default=False, action='store_true', help="Whether or not to add the forcefield data")
    parser.add_argument('--gpus', default=1, help="Number of gpus we use")

    parser.add_argument('--save-dir', default='~/work', help="Location where save data is dumped. If the folder does not exist then it will be created.")
    parser.add_argument('--train-dir', default='~/ANI-1_release', help="Location where training data is located")

    args = parser.parse_args()

    print("Arguments", args)

    lib_path = os.path.abspath(args.ani_lib)
    print("Loading custom kernel from", lib_path)
    initialize_module(lib_path)

    print("Available activation functions:", activations.get_all_fn_names())

    ANI_TRAIN_DIR = args.train_dir
    ANI_SAVE_DIR = args.save_dir

    # save_dir = os.path.join(ANI_SAVE_DIR, "save")
    save_file = os.path.join(ANI_SAVE_DIR, "save_file.npz")

    use_fitted = args.fitted
    add_ffdata = args.add_ffdata

    data_loader = DataLoader(False)

    all_Xs, all_Ys = data_loader.load_gdb8(ANI_TRAIN_DIR)

    X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(all_Xs, all_Ys, test_size=0.25) # stratify by UTT would be good to try here
    rd_train, rd_test = RawDataset(X_train, y_train), RawDataset(X_test,  y_test)

    X_gdb11, y_gdb11 = data_loader.load_gdb11(ANI_TRAIN_DIR)
    rd_gdb11 = RawDataset(X_gdb11, y_gdb11)

    batch_size = 1024

    config = tf.ConfigProto(allow_soft_placement=True)

    with tf.Session(config=config) as sess:

        # This training code implements cross-validation based training, whereby we determine convergence on a given
        # epoch depending on the cross-validation error for a given validation set. When a better cross-validation
        # score is detected, we save the model's parameters as the putative best found parameters. If after more than
        # max_local_epoch_count number of epochs have been run and no progress has been made, we decrease the learning
        # rate and restore the best found parameters.

        n_gpus = int(args.gpus)
        if n_gpus > 0:
            towers = ["/gpu:"+str(i) for i in range(n_gpus)]
        else:
            towers = ["/cpu:"+str(i) for i in range(multiprocessing.cpu_count())]

        print("Soft placing operations onto towers:", towers)

        # activation_fn = activations.get_fn_by_name("celu") # if you want to use the command line.
        activation_fn = activations.celu # preferred
        # activation_fn = tf.nn.selu
        # activation_fn = functools.partial(tf.nn.leaky_relu, alpha=0.2)
        # activation_fn = activations.get_fn_by_name("normal", 0.5, 0.2)


        trainer = TrainerMultiTower(
            sess,
            towers=towers,
            precision=tf.float32,
            layer_sizes=(128, 128, 64, 1),
            activation_fn=activation_fn,
            fit_charges=False,
        )

        if os.path.exists(save_file):
            print("Restoring existing model from", save_file)
            trainer.load_numpy(save_file)
        else:
            if not os.path.exists(ANI_SAVE_DIR):
                print("Save directory",ANI_SAVE_DIR,"does not existing... creating")
                os.makedirs(ANI_SAVE_DIR)
            trainer.initialize() # initialize to random variables

        max_local_epoch_count = 10

        train_ops = [
            trainer.global_epoch_count,
            trainer.learning_rate,
            trainer.local_epoch_count,
            trainer.unordered_l2s,
            trainer.train_op,
        ]

        best_test_score = trainer.eval_abs_rmse(rd_test)

        # Uncomment if you'd like to inspect the gradients
        # all_grads = []
        # for grad in trainer.coordinate_gradients(rd_test):
        #     all_grads.append(grad)
        # assert len(all_grads) == rd_test.num_mols()

        print("------------Starting Training--------------")

        start_time = time.time()

        while sess.run(trainer.learning_rate) > 5e-10: # this is to deal with a numerical error, we technically train to 1e-9

            while sess.run(trainer.local_epoch_count) < max_local_epoch_count:

                # sess.run(trainer.max_norm_ops) # should this run after every batch instead?

                start_time = time.time()
                train_results = list(trainer.feed_dataset(
                    rd_train,
                    shuffle=True,
                    target_ops=train_ops,
                    batch_size=batch_size,
                    before_hooks=trainer.max_norm_ops))

                global_epoch = train_results[0][0]
                time_per_epoch = time.time() - start_time
                train_abs_rmse = np.sqrt(np.mean(flatten_results(train_results, pos=3))) * HARTREE_TO_KCAL_PER_MOL
                learning_rate = train_results[0][1]
                local_epoch_count = train_results[0][2]

                test_abs_rmse = trainer.eval_abs_rmse(rd_test)
                print(time.strftime("%Y-%m-%d %H:%M:%S"), 'tpe:', "{0:.2f}s,".format(time_per_epoch), 'g-epoch', global_epoch, 'l-epoch', local_epoch_count, 'lr', "{0:.0e}".format(learning_rate), \
                    'train/test abs rmse:', "{0:.2f} kcal/mol,".format(train_abs_rmse), "{0:.2f} kcal/mol".format(test_abs_rmse), end='')

                if test_abs_rmse < best_test_score:
                    gdb11_abs_rmse = trainer.eval_abs_rmse(rd_gdb11)
                    print(' | gdb11 abs rmse', "{0:.2f} kcal/mol | ".format(gdb11_abs_rmse), end='')

                    best_test_score = test_abs_rmse
                    sess.run([trainer.incr_global_epoch_count, trainer.reset_local_epoch_count])
                else:
                    sess.run([trainer.incr_global_epoch_count, trainer.incr_local_epoch_count])

                trainer.save_numpy(save_file)

                print('', end='\n')

            print("==========Decreasing learning rate==========")
            sess.run(trainer.decr_learning_rate)
            sess.run(trainer.reset_local_epoch_count)
            # trainer.load_best_params()

    return