Beispiel #1
0
def main():

    args = parse_args(sys.argv)
    lib_path = os.path.abspath(args.ani_lib)
    initialize_module(lib_path)

    save_dir = os.path.join(args.work_dir, "save")

    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config) as sess:

        layer_sizes = (128, 128, 64, 1)
        if args.deep_network:
            layer_sizes = (256, 256, 256, 256, 256, 256, 256, 128, 64, 8, 1)
        towers = ["/cpu:0"]
        print("start with layers", layer_sizes)
        trainer = TrainerMultiTower(
            sess,
            towers,
            layer_sizes=layer_sizes,
            fit_charges=args.fit_charges,
            gaussian_activation=args.gaussian_activation)

        trainer.load(save_dir)

        s = client_server.connect_socket(args.host, args.port, server=True)

        if args.debug:
            print("Server listening on port %d" % args.port)

        while True:

            if args.debug:
                print("awaiting connection...")

            conn, addr = s.accept()

            if args.debug:
                print("Connection established...")

            while True:

                rcv_data = client_server.recieve(conn)

                print("recieved data", rcv_data)

                if rcv_data:

                    X = json.loads(rcv_data).get('X')
                    X_np = np.array(X, dtype=np.float32)
                    rd = RawDataset([X_np], [0.0])

                    # should I go back to total energy?
                    energy = float(trainer.predict(rd)[0])
                    self_interaction = sum(
                        data_utils.selfIxnNrgWB97X_631gdp[example[0]]
                        for example in X)
                    energy += self_interaction

                    gradient = list(trainer.coordinate_gradients(rd))[0]
                    natoms, ndim = gradient.shape
                    gradient = gradient.reshape(natoms * ndim)

                    if args.fdiff_grad:
                        fd_gradient = fdiff_grad(X_np, trainer)
                        dg = gradient - fd_gradient
                        grms = np.sqrt(sum(dg[:]**2.0) / (natoms * ndim))
                        dot = np.dot(gradient, fd_gradient)
                        norm_g = np.sqrt(np.dot(gradient, gradient))
                        norm_fd = np.sqrt(np.dot(fd_gradient, fd_gradient))
                        dot = np.dot(gradient,
                                     fd_gradient) / (norm_fd * norm_g)
                        gradient[:] = fd_gradient[:]
                        print("RMS gradient fdiff/analytic %.4e" % grms)
                        print("Gradient dot product %.4f" % dot)

                    # convert gradient from hartree/angstrom to hartree/bohr
                    # and to jsonable format
                    gradient = [float(g) * BOHR for g in gradient]

                    print("sending gradient")
                    print(gradient)

                    send_data = json.dumps({
                        "energy": energy,
                        "gradient": gradient
                    })

                    print("sending response...")

                    client_server.send(conn, send_data)

                else:
                    break
Beispiel #2
0
def main():

    args = parse_args(sys.argv)
    lib_path = os.path.abspath(args.ani_lib)
    initialize_module(lib_path)

    save_file = os.path.join(args.save_dir, "save_file.npz")
    if not os.path.exists(save_file):
        raise IOError("Saved NN numpy file does not exist")

    _, _, X_test, y_test, X_big, y_big = load_reactivity_data(
        args.reactivity_dir, 1.0)
    small_reactions, big_reactions = read_all_reactions(args.reactivity_dir)

    rd_test = RawDataset(X_test, y_test)
    rd_big = RawDataset(X_big, y_big)

    config = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=config) as sess:
        towers = ["/cpu:0"]
        layers = (128, 128, 64, 1)
        if args.deep_network:
            layers = (256, 256, 256, 256, 256, 256, 256, 128, 64, 8, 1)
        activation_fn = activations.get_fn_by_name(args.activation_function)

        trainer = TrainerMultiTower(
            sess,
            towers=towers,
            precision=tf.float64,
            layer_sizes=layers,
            activation_fn=activation_fn,
            fit_charges=args.fit_charges,
        )

        trainer.load_numpy(save_file)

        if args.analyze_reaction_errors:

            if not os.path.exists("small_reactions_comparison"):
                os.mkdir("small_reactions_comparison")
            if not os.path.exists("big_reactions_comparison"):
                os.mkdir("big_reactions_comparison")

            for dataname, data in (("small_reactions", small_reactions),
                                   ("big_reactions", big_reactions)):

                # get reactant, TS product
                Xr, Er = [], []
                Xts, Ets = [], []
                Xp, Ep = [], []

                for name in data:
                    Xs, Es = data[name]

                    if args.write_comparison_data:
                        # make a directory HERE
                        directory = dataname + "_comparison"
                        write_reaction_data(os.path.join(directory, name), Xs,
                                            Es, trainer)

                    Xr.append(Xs[0])
                    Er.append(Es[0])
                    Xp.append(Xs[-1])
                    Ep.append(Es[-1])

                    # ts is highest energy point along path
                    emax = max(Es)
                    idx = Es.index(emax)
                    Xts.append(Xs[idx])
                    Ets.append(Es[idx])

                # make datasets
                rd_r = RawDataset(Xr, Er)
                rd_p = RawDataset(Xp, Ep)
                rd_ts = RawDataset(Xts, Ets)

                Er = np.array(Er)
                Ep = np.array(Ep)
                Ets = np.array(Ets)

                # predict energies
                r_predictions = np.array(trainer.predict(rd_r))
                p_predictions = np.array(trainer.predict(rd_p))
                ts_predictions = np.array(trainer.predict(rd_ts))

                barriers = (Ets - Er) * KCAL
                reverse_barriers = (Ets - Ep) * KCAL
                predicted_barriers = (ts_predictions - r_predictions) * KCAL
                predicted_reverse_barriers = (ts_predictions -
                                              p_predictions) * KCAL
                rxn_e = (Ep - Er) * KCAL
                predicted_rxn_e = (p_predictions - r_predictions) * KCAL

                barrier_errors = barriers - predicted_barriers
                barrier_rmse = np.sqrt(
                    sum(barrier_errors[:]**2.0) / len(barrier_errors))
                reverse_barrier_errors = reverse_barriers - predicted_reverse_barriers
                reverse_barrier_rmse = np.sqrt(
                    sum(reverse_barrier_errors[:]**2.0) /
                    len(reverse_barrier_errors))
                rxn_errors = rxn_e - predicted_rxn_e
                rxn_rmse = np.sqrt(sum(rxn_errors[:]**2.0) / len(rxn_errors))

                # barrier height plot
                bmu, bsigma = histogram(barrier_errors,
                                        "Barrier height errors")
                rbmu, rbsigma = histogram(reverse_barrier_errors,
                                          "Reverse Barrier height errors")
                rmu, rsigma = histogram(rxn_errors, "Reaction energy errors")
                plt.xlabel("Error (kcal/mol)")
                plt.title("Reaction energetic errors for %s" % dataname)
                plt.legend()

                #plt.scatter(barriers, predicted_barriers)
                #plt.scatter(rxn_e, predicted_rxn_e)
                plt.savefig("%s_barrier_height_errors.pdf" % dataname)
                plt.clf()

                print("errors for %s" % dataname)
                print("Barrier RMSE %.2f rxn RMSE %.2f" %
                      (barrier_rmse, rxn_rmse))
                print("Reverse Barrier RMSE %.2f" % reverse_barrier_rmse)
                print("rxn mu %f sigma %f" % (rmu, rsigma))
                print("barrier mu %f sigma %f" % (bmu, bsigma))
                print("reverse barrier mu %f sigma %f" % (rbmu, rbsigma))

        # plot distribution of raw errors
        if args.analyze_raw_errors:
            #evaluate errors in predictions
            rxn_predictions = trainer.predict(rd_test)
            big_predictions = trainer.predict(rd_big)
            rxn_errors = np.array(rxn_predictions) - np.array(y_test)
            big_errors = np.array(big_predictions) - np.array(y_big)
            rxn_rmse = np.sqrt(sum(rxn_errors[:]**2.0) / len(rxn_errors))
            big_rmse = np.sqrt(sum(big_errors[:]**2.0) / len(big_errors))
            rxn_errors = rxn_errors * KCAL
            big_errors = big_errors * KCAL

            print("small rmse %.4f big rmse %.4f" %
                  (rxn_rmse * KCAL, big_rmse * KCAL))

            smu, ssigma = histogram(
                rxn_errors, "Atomization energy errors for small systems")
            bmu, bsigma = histogram(
                big_errors, "Atomization energy errors for large systems")
            plt.xlabel("Error (kcal/mol)")
            plt.title("Atomization energy errors")
            plt.legend()
            plt.savefig("atomization_errors.pdf")
            plt.clf()

            print("small atomization mu %f sigma %f" % (smu, ssigma))
            print("big atomization mu %f sigma %f" % (bmu, bsigma))