Beispiel #1
0
def train_add(func=lambda a, b: a + b, results_dir=None, reg_weight=5e-2, learning_rate=1e-2, n_epochs=10001):
    """Addition of two MNIST digits with a symbolic regression network."""
    tf.reset_default_graph()

    # Symbolic regression network to combine the conv net outputs
    PRIMITIVE_FUNCS = [
        *[functions.Constant()] * 2,
        *[functions.Identity()] * 4,
        *[functions.Square()] * 4,
        *[functions.Sin()] * 2,
        *[functions.Exp()] * 2,
        *[functions.Sigmoid()] * 2,
        *[functions.Product()] * 2,
    ]
    sr_net = symbolic_network.SymbolicNet(2, funcs=PRIMITIVE_FUNCS, init_stddev=0.1)  # Symbolic regression network
    # Overall architecture
    sym_digit_network = SymbolicDigit(sr_net=sr_net, normalize=normalize)
    # Set up regularization term and training
    penalty = regularization.l12_smooth(sr_net.get_weights())
    penalty = reg_weight * penalty
    sym_digit_network.set_training(reg=penalty)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True   # Take up variable amount of memory on GPU
    sess = tf.Session(config=config)

    batch = batch_generator(batch_size=100)

    # Train, and restart training if loss goes to NaN
    loss_i = np.nan
    while np.isnan(loss_i):
        sess.run(tf.global_variables_initializer())
        loss_i = sym_digit_network.train(sess, n_epochs, batch, func, lr_val=learning_rate)
        if np.isnan(loss_i):
            continue

        # Freezing weights
        sr_net = symbolic_network.MaskedSymbolicNet(sess, sr_net, threshold=0.01)
        sym_digit_network = SymbolicDigitMasked(sym_digit_network, sr_net, normalize=normalize)
        sym_digit_network.set_training()

        # Training with frozen weights. Regularization is 0
        loss_i = sym_digit_network.train(sess, n_epochs, batch, func, lr_val=learning_rate/10)

    # Print out human-readable equation (with regularization)
    weights = sess.run(sr_net.get_weights())
    expr = pretty_print.network(weights, PRIMITIVE_FUNCS, ["z1", "z2"])
    expr = normalize(expr)
    print(expr)

    # Calculate accuracy on test dataset
    acc_test, error_test = sym_digit_network.calc_accuracy(X_test, y_test, func, sess)
    result_str = 'Test accuracy: %g\n' % acc_test
    print(result_str)

    sym_digit_network.save_result(sess, results_dir, expr, result_str)
Beispiel #2
0
    def train(self, func, func_name='', trials=1, func_dir='results/test'):
        """Train the network to find a given function"""

        x_dim = len(signature(func).parameters)  # Number of input arguments to the function
        # Generate training data and test data
        x, y = generate_data(func, N_TRAIN)
        # x_val, y_val = generate_data(func, N_VAL)
        x_test, y_test = generate_data(func, N_TEST, range_min=DOMAIN_TEST[0], range_max=DOMAIN_TEST[1])

        # Setting up the symbolic regression network
        x_placeholder = tf.compat.v1.placeholder(shape=(None, x_dim), dtype=tf.float32)
        width = len(self.activation_funcs)
        n_double = functions.count_double(self.activation_funcs)
        sym = SymbolicNetL0(self.n_layers, funcs=self.activation_funcs,
                            initial_weights=[
                                                 tf.random.truncated_normal([x_dim, width + n_double], stddev=init_sd_first, dtype=tf.float32), #removed the dtypes
                                                 tf.random.truncated_normal([width, width + n_double], stddev=init_sd_middle, dtype=tf.float32),
                                                 tf.random.truncated_normal([width, width + n_double], stddev=init_sd_middle, dtype=tf.float32),
                                                 tf.random.truncated_normal([width, 1], stddev=init_sd_last, dtype=tf.float32)
                                             ], )
        y_hat = sym(x_placeholder)

        # Label and errors
        error = tf.keras.losses.mean_squared_error(y_true=y, y_pred=y_hat)
        error_test = tf.keras.losses.mean_squared_error(y_true=y_test, y_pred=y_hat)
        # Regularization oscillates as a function of epoch.
        reg_loss = sym.get_loss()
        loss = error + self.reg_weight * reg_loss

        # Training
        learning_rate = tf.compat.v1.placeholder(tf.float32)
        opt = tf.compat.v1.train.RMSPropOptimizer(learning_rate=learning_rate)
        train = opt.minimize(loss)

        loss_list = []  # Total loss (MSE + regularization)
        error_list = []     # MSE
        reg_list = []       # Regularization
        error_test_list = []    # Test error

        error_test_final = []
        eq_list = []

        # Only take GPU memory as needed - allows multiple jobs on a single GPU
        config = tf.compat.v1.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.compat.v1.Session(config=config) as sess:
            for trial in range(trials):
                print("Training on function " + func_name + " Trial " + str(trial+1) + " out of " + str(trials))

                loss_val = np.nan
                # Restart training if loss goes to NaN (which happens when gradients blow up)
                while np.all(np.isnan(loss_val)):
                    sess.run(tf.compat.v1.global_variables_initializer())
                    # 1st stage of training with oscillating regularization weight
                    for i in range(self.n_epochs1):
                        feed_dict = {x_placeholder: x, learning_rate: self.learning_rate}
                        _ = sess.run(train, feed_dict=feed_dict)
                        if i % self.summary_step == 0:
                            loss_val, error_val, reg_val, = sess.run((loss, error, reg_loss), feed_dict=feed_dict)
                            error_test_val = sess.run(error_test, feed_dict={x_placeholder: x_test})

                            loss_val_avg = sum(loss_val) / len(loss_val)
                            error_test_val_avg = sum(error_test_val) / len(error_test_val)
                            error_val_avg = sum(error_val) / len(error_val)

                            print(error_val_avg)

                            print("Epoch: %d\tTotal training loss: %f\tTest error: %f" % (i, loss_val_avg, error_test_val_avg))
                            loss_list.append(loss_val_avg)
                            error_list.append(error_val_avg)
                            reg_list.append(reg_val)
                            error_test_list.append(error_test_val_avg)
                            if np.any(np.isnan(loss_val)):  # If loss goes to NaN, restart training
                                break

                # Print the expressions
                weights = sess.run(sym.get_weights())
                expr = pretty_print.network(weights, self.activation_funcs, var_names[:x_dim])
                print(expr)

                # Save results
                trial_file = os.path.join(func_dir, 'trial%d.pickle' % trial)

                results = {
                    "weights": weights,
                    "loss_list": loss_list,
                    "error_list": error_list,
                    "reg_list": reg_list,
                    "error_test": error_test_list,
                    "expr": expr
                }

                with open(trial_file, "wb+") as f:
                    pickle.dump(results, f)

                error_test_final.append(error_test_list[-1])
                eq_list.append(expr)

        return eq_list, error_test_final
Beispiel #3
0
def main(results_dir='results/sho/test', trials=1, learning_rate=1e-2, reg_weight=2e-4, timesteps=25, batch_size=129,
         n_epochs1=2001, n_epochs2=5001, n_epochs3=5001):
    # Hyperparameters
    summary_step = 500
    timesteps0 = 1

    primitive_funcs = [
        *[functions.Constant()] * 2,
        *[functions.Identity()] * 4,
        *[functions.Square()] * 4,
        *[functions.Sin()] * 2,
        *[functions.Exp()] * 2,
        *[functions.Sigmoid()] * 2,
        *[functions.Product(norm=0.1)] * 2,
    ]

    # Import parabola data
    data = np.load('dataset/sho.npz')
    x_d = np.asarray(data["x_d"])
    x_v = np.asarray(data["x_v"])
    y_d = np.asarray(data["y_d"])
    y_v = np.asarray(data["y_v"])
    omega2_data = data["omega2"]
    N = data["N"]

    # Prepare data
    x = np.stack((x_d, x_v), axis=2)    # Shape (N, NT, 2)
    y0 = np.stack((y_d[:, 0], y_v[:, 0]), axis=1)   # Initial conditions for prediction y, fed into propagator
    y_data = np.stack((y_d[:, 1:timesteps + 1], y_v[:, 1:timesteps + 1]), axis=2)     # shape(NG, LENGTH, 2)

    # Tensorflow placeholders for x, y0, y
    x_input = tf.placeholder(shape=(None, x.shape[1], x.shape[2]), dtype=tf.float32, name="enc_input")
    y0_input = tf.placeholder(shape=(None, 2), dtype=tf.float32, name="prop_input")  # input is d, v
    y_input = tf.placeholder(shape=(None, timesteps, 2), dtype=tf.float32, name="label_input")
    length_input = tf.placeholder(dtype=tf.int32, shape=())

    # Dynamics encoder
    encoder = helpers.Encoder()
    training = tf.placeholder_with_default(False, [])
    z = encoder(x_input, training=training)
    z_data = omega2_data[:, np.newaxis]

    # Propagating decoders
    prop_d = SymbolicNet(2, funcs=primitive_funcs)
    prop_v = SymbolicNet(2, funcs=primitive_funcs)
    prop_d.build(4)
    prop_v.build(4)
    # Building recurrent structure
    rnn = tf.keras.layers.RNN(SymbolicCell(prop_d, prop_v), return_sequences=True)
    y0_rnn = tf.concat([tf.expand_dims(y0_input, axis=1), tf.zeros((tf.shape(y0_input)[0], length_input - 1, 2))],
                       axis=1)
    prop_input = tf.concat([y0_rnn, tf.keras.backend.repeat(z, length_input),
                            tf.ones((tf.shape(y0_input)[0], length_input, 1))], axis=2)
    prop_output = rnn(prop_input)

    epoch = tf.placeholder(tf.float32)
    reg_freq = np.pi / (n_epochs1 + n_epochs2) / 1.1
    reg_loss = tf.sin(reg_freq * epoch) ** 2 * regularization.l12_smooth(prop_d.get_weights()) + \
               tf.sin(reg_freq * epoch) ** 2 * regularization.l12_smooth(prop_v.get_weights())
    # reg_loss = regularization.l12_smooth(prop_d.get_weights()) + regularization.l12_smooth(prop_v.get_weights())

    # Training
    learning_rate_ph = tf.placeholder(tf.float32)
    opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate_ph)
    reg_weight_ph = tf.placeholder(tf.float32)
    error = tf.losses.mean_squared_error(labels=y_input[:, :length_input, :], predictions=prop_output)
    loss = error + reg_weight_ph * reg_loss
    train = tf.group([opt.minimize(loss), encoder.bn.updates])

    batch = helpers.batch_generator([x, y_data, y0, z_data], N=N, batch_size=batch_size)

    # Training session
    with tf.Session() as sess:
        for _ in range(trials):
            loss_i = np.nan

            while np.isnan(loss_i):
                loss_list = []
                error_list = []
                reg_list = []

                sess.run(tf.global_variables_initializer())

                for i in range(n_epochs1 + n_epochs2):
                    if i < n_epochs1:
                        reg_weight_i = reg_weight / 5
                        learning_rate_i = learning_rate
                        length_i = min(i // 500 * 2 + timesteps0, timesteps)
                    else:
                        reg_weight_i = reg_weight
                        learning_rate_i = learning_rate / 5
                        length_i = timesteps

                    x_batch, y_batch, y0_batch, z_batch = next(batch)
                    feed_dict = {x_input: x_batch, y0_input: y0_batch, y_input: y_batch,
                                 epoch: i, learning_rate_ph: learning_rate_i, training: True,
                                 reg_weight_ph: reg_weight_i, length_input: length_i}
                    _ = sess.run(train, feed_dict=feed_dict)

                    if i % summary_step == 0 or i == n_epochs1 - 1:
                        feed_dict[training] = False
                        loss_i, error_i, reg_i = sess.run((loss, error, reg_loss), feed_dict=feed_dict)
                        z_arr = sess.run(z, feed_dict=feed_dict)
                        r = np.corrcoef(z_batch[:, 0], z_arr[:, 0])[1, 0]
                        loss_list.append(loss_i)
                        error_list.append(error_i)
                        reg_list.append(reg_i)
                        print("Epoch %d\tTotal loss: %f\tError: %f\tReg loss: %f\tCorrelation: %f"
                              % (i, loss_i, error_i, reg_i, r))
                        if np.isnan(loss_i):
                            break

            # Setting small weights to 0 and freezing them
            prop_d_masked = MaskedSymbolicNet(sess, prop_d, threshold=0.01)
            prop_v_masked = MaskedSymbolicNet(sess, prop_v, threshold=0.01)
            # Keep track of currently existing variables. When we rebuild the rnn, it makes new variables that we need
            # to initialize. Later, we will use this to figure out what the uninitialized variables are.
            temp = set(tf.global_variables())
            # Rebuilding the decoding propagator. Remove regularization
            rnn = tf.keras.layers.RNN(SymbolicCell(prop_d_masked, prop_v_masked), return_sequences=True)
            prop_output = rnn(prop_input)
            loss = tf.losses.mean_squared_error(labels=y_input[:, :length_input, :], predictions=prop_output)
            train = tf.group([opt.minimize(loss), encoder.bn.updates])

            weights_d = sess.run(prop_d_masked.get_weights())
            expr_d = pretty_print.network(weights_d, primitive_funcs, ["d", "v", "z", 1])
            print(expr_d)
            weights_v = sess.run(prop_v_masked.get_weights())
            expr_v = pretty_print.network(weights_v, primitive_funcs, ["d", "v", "z", 1])
            print(expr_v)

            print("Frozen weights. Next stage of training.")

            # Initialize only the uninitialized variables.
            sess.run(tf.variables_initializer(set(tf.global_variables()) - temp))

            for i in range(n_epochs3):
                x_batch, y_batch, y0_batch, z_batch = next(batch)
                feed_dict = {x_input: x_batch, y0_input: y0_batch, y_input: y_batch,
                             epoch: 0, learning_rate_ph: learning_rate / 10, training: True, reg_weight_ph: 0,
                             length_input: length_i}
                _ = sess.run(train, feed_dict=feed_dict)
                if i % summary_step == 0:
                    feed_dict[training] = False
                    loss_i, error_i, reg_i = sess.run((loss, error, reg_loss), feed_dict=feed_dict)
                    z_arr = sess.run(z, feed_dict=feed_dict)
                    r = np.corrcoef(z_batch[:, 0], z_arr[:, 0])[1, 0]
                    loss_list.append(loss_i)
                    error_list.append(error_i)
                    reg_list.append(reg_i)
                    print("Epoch %d\tError: %g\tCorrelation: %f" % (i, error_i, r))

            weights_d = sess.run(prop_d_masked.get_weights())
            expr_d = pretty_print.network(weights_d, primitive_funcs, ["d", "v", "z", 1])
            print(expr_d)
            weights_v = sess.run(prop_v_masked.get_weights())
            expr_v = pretty_print.network(weights_v, primitive_funcs, ["d", "v", "z", 1])
            print(expr_v)

            # Save results
            results = {
                "summary_step": summary_step,
                "learning_rate": learning_rate,
                "n_epochs1": n_epochs1,
                "n_epochs2": n_epochs2,
                "reg_weight": reg_weight,
                "timesteps": timesteps,
                "timesteps0": timesteps0,
                "weights_d": weights_d,
                "weights_v": weights_v,
                "loss_plot": loss_list,
                "error_plot": error_list,
                "reg_plot": reg_list,
                "expr_d": expr_d,
                "expr_v": expr_v
            }

            trial_dir = helpers.get_trial_path(results_dir)  # Get directory in which to save trial results

            tf.saved_model.simple_save(sess, trial_dir,
                                       inputs={"x": x_input, "y0": y0_input, "training": training},
                                       outputs={"z": z, "y": prop_output})

            # Save a summary of the parameters and results
            with open(os.path.join(trial_dir, 'summary.pickle'), "wb+") as f:
                pickle.dump(results, f)

            with open(os.path.join(results_dir, 'eq_summary.txt'), 'a') as f:
                f.write(str(expr_d) + "\n")
                f.write(str(expr_v) + "\n")
                f.write("Error: %f\n\n" % error_list[-1])
    def train(self, func, func_name='', trials=1, func_dir='results/test'):
        """Train the network to find a given function"""

        x, y = generate_data(func, N_TRAIN)
        # x_val, y_val = generate_data(func, N_VAL)
        x_test, y_test = generate_data(func,
                                       N_TEST,
                                       range_min=DOMAIN_TEST[0],
                                       range_max=DOMAIN_TEST[1])

        # Setting up the symbolic regression network
        x_dim = len(signature(
            func).parameters)  # Number of input arguments to the function

        # x_placeholder = tf.placeholder(shape=(None, x_dim), dtype=tf.float32)
        width = len(self.activation_funcs)
        n_double = functions.count_double(self.activation_funcs)

        # Arrays to keep track of various quantities as a function of epoch
        loss_list = []  # Total loss (MSE + regularization)
        error_list = []  # MSE
        reg_list = []  # Regularization
        error_test_list = []  # Test error

        error_test_final = []
        eq_list = []

        for trial in range(trials):
            print("Training on function " + func_name + " Trial " +
                  str(trial + 1) + " out of " + str(trials))

            # reinitialize for each trial
            net = SymbolicNet(
                self.n_layers,
                funcs=self.activation_funcs,
                initial_weights=[
                    # kind of a hack for truncated normal
                    torch.fmod(
                        torch.normal(0,
                                     init_sd_first,
                                     size=(x_dim, width + n_double)), 2),
                    torch.fmod(
                        torch.normal(0,
                                     init_sd_middle,
                                     size=(width, width + n_double)), 2),
                    torch.fmod(
                        torch.normal(0,
                                     init_sd_middle,
                                     size=(width, width + n_double)), 2),
                    torch.fmod(torch.normal(0, init_sd_last, size=(width, 1)),
                               2)
                ])

            criterion = nn.MSELoss()
            optimizer = optim.RMSprop(
                net.parameters(),
                lr=self.learning_rate * 10,
                momentum=0.0,
                # weight_decay=7
            )

            # adapative learning rate
            lmbda = lambda epoch: 0.1 * epoch
            scheduler = optim.lr_scheduler.MultiplicativeLR(optimizer,
                                                            lr_lambda=lmbda)

            for param_group in optimizer.param_groups:
                print("Learning rate: %f" % param_group['lr'])

            loss_val = np.nan
            # Restart training if loss goes to NaN (which happens when gradients blow up)
            while np.isnan(loss_val):
                t0 = time.time()

                # First stage of training, preceded by 0th warmup stage
                for epoch in range(self.n_epochs1 + 2000):
                    inputs, labels = x, y

                    # zero the parameter gradients
                    optimizer.zero_grad()
                    # forward + backward + optimize
                    outputs = net(inputs)
                    # TODO
                    regularization = L12Smooth()

                    mse_loss = criterion(outputs, labels)
                    reg_loss = regularization(net.get_weights_tensor())
                    loss = mse_loss + self.reg_weight * reg_loss

                    loss.backward()
                    optimizer.step()

                    if epoch % self.summary_step == 0:
                        error_val = mse_loss.item()
                        reg_val = reg_loss.item()
                        loss_val = error_val + self.reg_weight * reg_val
                        print(
                            "Epoch: %d\tTotal training loss: %f\tReg loss: %f"
                            % (epoch, loss_val, reg_val))
                        error_list.append(error_val)
                        reg_list.append(reg_val)
                        loss_list.append(loss_val)

                        # TODO: error test val
                        # loss_val, error_val, reg_val, = sess.run((loss, error, reg_loss), feed_dict=feed_dict)
                        # error_test_val = sess.run(error_test, feed_dict={x_placeholder: x_test})
                        # print("Epoch: %d\tTotal training loss: %f\tTest error: %f" % (i, loss_val, error_test_val))

                        # error_list.append(error_val)
                        # error_test_list.append(error_test_val)
                        if np.isnan(loss_val
                                    ):  # If loss goes to NaN, restart training
                            break

                    if epoch == 2000:
                        scheduler.step()  # lr /= 10
                        for param_group in optimizer.param_groups:
                            print(param_group['lr'])

                # scheduler.step()  # lr /= 10 again
                for param_group in optimizer.param_groups:
                    print("Learning rate: %f" % param_group['lr'])

                t1 = time.time()

            tot_time = t1 - t0
            print(tot_time)

            # Print the expressions
            with torch.no_grad():
                weights = net.get_weights()
                expr = pretty_print.network(weights, self.activation_funcs,
                                            var_names[:x_dim])
                print(expr)

            # Save results
            trial_file = os.path.join(func_dir, 'trial%d.pickle' % trial)
            results = {
                "weights": weights,
                "loss_list": loss_list,
                "error_list": error_list,
                "reg_list": reg_list,
                "error_test": error_test_list,
                "expr": expr,
                "runtime": tot_time
            }
            with open(trial_file, "wb+") as f:
                pickle.dump(results, f)

            # error_test_final.append(error_test_list[-1])
            eq_list.append(expr)

        return eq_list, error_test_final
Beispiel #5
0
def main(results_dir='results/kinematics/test',
         learning_rate=1e-2,
         reg_weight=1e-3,
         n_epochs1=5001,
         n_epochs2=5001,
         timesteps=5):
    # Hyperparameters
    summary_step = 500
    timesteps0 = 1

    # Import kinematics data
    data = np.load('dataset/kinematic.npz')
    x_d = np.asarray(data["x_d"])
    x_v = np.asarray(data["x_v"])
    y_d = np.asarray(data["y_d"])
    y_v = np.asarray(data["y_v"])
    a_data = np.asarray(data["g"])

    # Prepare data
    # The first few time steps are reserved for the symbolic regression propagator
    x = np.stack((x_d, x_v), axis=2)  # Shape (N, NT, 2)
    y0 = np.stack((y_d[:, 0], y_v[:, 0]),
                  axis=1)  # Input into the symbolic propagator
    label_data = np.stack((y_d[:, 1:timesteps + 1], y_v[:, 1:timesteps + 1]),
                          axis=2)  # shape(NG, timesteps, 2)

    # Encoder
    encoder = helpers.Encoder()  # layer should end with 1, which is the output
    x_input = tf.placeholder(shape=(None, x.shape[1], x.shape[2]),
                             dtype=tf.float32,
                             name="enc_input")
    y_input = tf.placeholder(shape=(None, timesteps, 2),
                             dtype=tf.float32,
                             name="label_input")
    training = tf.placeholder_with_default(False, [])
    z = encoder(x_input, training=training)
    # z = np.array(a_data)[:, np.newaxis]  # uncomment to ignore the autoencoder

    # Propagating decoder
    primitive_funcs = [
        *[functions.Constant()] * 2,
        *[functions.Identity()] * 4,
        *[functions.Square()] * 4,
        *[functions.Sin()] * 2,
        *[functions.Exp()] * 2,
        *[functions.Sigmoid()] * 2,
        *[functions.Product(norm=0.1)] * 2,
    ]
    prop_d = SymbolicNet(2, funcs=primitive_funcs)
    prop_v = SymbolicNet(2, funcs=primitive_funcs)
    prop_input = tf.placeholder(shape=(None, 2),
                                dtype=tf.float32,
                                name="prop_input")  # input is d, v

    def rec_sr(y0_input, enc_output, length, prop1=prop_d, prop2=prop_v):
        rec_input = [y0_input]
        for i in range(length):
            full_input = tf.concat(
                [rec_input[i], enc_output,
                 tf.ones_like(enc_output)],
                axis=1,
                name="full_input")  # d, v, z
            rec_input.append(
                tf.concat(
                    [prop1(full_input), prop2(full_input)],
                    axis=1,
                    name="c_prop_input"))
        output = tf.stack(rec_input[1:], axis=1)  # Ignore initial conditions
        return output

    y_hat_start = rec_sr(prop_input, z, timesteps0, prop_d, prop_v)
    y_hat_full = rec_sr(prop_input, z, timesteps, prop_d, prop_v)

    # Label and errors
    epoch = tf.placeholder(tf.float32)
    reg_weight_ph = tf.placeholder(tf.float32)
    reg_loss = regularization.l12_smooth(
        prop_d.get_weights()) + regularization.l12_smooth(prop_v.get_weights())

    # Training
    learning_rate_ph = tf.placeholder(tf.float32)
    opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate_ph)

    def define_loss(prop_output, length):
        error = tf.losses.mean_squared_error(
            labels=y_input[:, :length, :],
            predictions=prop_output[:, :length, :])
        loss = error + reg_weight_ph * reg_loss
        train = opt.minimize(loss)
        train = tf.group([train, encoder.bn.updates])
        return error, loss, train

    error_start, loss_start, train_start = define_loss(y_hat_start, timesteps0)
    error_full, loss_full, train_full = define_loss(y_hat_full, timesteps)

    # Training session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True  # Take up variable amount of memory on GPU
    with tf.Session(config=config) as sess:

        loss_i = np.nan
        while np.isnan(loss_i):

            loss_list = []
            error_list = []
            reg_list = []
            error, loss, train = error_start, loss_start, train_start

            sess.run(tf.global_variables_initializer())

            for i in range(n_epochs1):
                feed_dict = {
                    x_input: x,
                    prop_input: y0,
                    y_input: label_data,
                    epoch: 0,
                    learning_rate_ph: learning_rate,
                    training: True,
                    reg_weight_ph: reg_weight
                }
                _ = sess.run(train, feed_dict=feed_dict)
                if i % summary_step == 0:
                    feed_dict[training] = False
                    print_loss, print_error, print_l12 = sess.run(
                        (loss, error, reg_loss), feed_dict=feed_dict)
                    loss_list.append(print_loss)
                    error_list.append(print_error)
                    reg_list.append(print_l12)
                    print("Epoch %d\tTotal loss: %f\tError: %f\tReg loss: %f" %
                          (i, print_loss, print_error, print_l12))
                    loss_i = print_loss
                    if i > 2000:
                        error, loss, train = error_full, loss_full, train_full
                    if np.isnan(loss_i):
                        break

        # Setting small weights to 0 and freezing them
        prop_d_masked = MaskedSymbolicNet(sess, prop_d, threshold=0.1)
        prop_v_masked = MaskedSymbolicNet(sess, prop_v, threshold=0.1)

        # Rebuilding the decoding propagator
        prop_output_masked = rec_sr(prop_input, z, timesteps, prop_d_masked,
                                    prop_v_masked)
        error, loss, train = define_loss(prop_output_masked, timesteps)

        weights_d = sess.run(prop_d_masked.get_weights())
        expr_d = pretty_print.network(weights_d, primitive_funcs,
                                      ["d", "v", "z", 1])
        print(expr_d)
        weights_v = sess.run(prop_v_masked.get_weights())
        expr_v = pretty_print.network(weights_v, primitive_funcs,
                                      ["d", "v", "z", 1])
        print(expr_v)

        print("Frozen weights. Next stage of training.")

        for i in range(n_epochs2):
            feed_dict = {
                x_input: x,
                prop_input: y0,
                y_input: label_data,
                epoch: 0,
                learning_rate_ph: learning_rate / 10,
                training: True,
                reg_weight_ph: 0
            }
            _ = sess.run(train, feed_dict=feed_dict)
            if i % summary_step == 0:
                feed_dict[training] = False
                print_loss, print_error, print_l12 = sess.run(
                    (loss, error, reg_loss), feed_dict=feed_dict)
                loss_list.append(print_loss)
                error_list.append(print_error)
                reg_list.append(print_l12)
                print("Epoch %d\tError: %g" % (i, print_error))

        weights_d = sess.run(prop_d_masked.get_weights())
        expr_d = pretty_print.network(weights_d, primitive_funcs,
                                      ["d", "v", "z", 1])
        print(expr_d)
        weights_v = sess.run(prop_v_masked.get_weights())
        expr_v = pretty_print.network(weights_v, primitive_funcs,
                                      ["d", "v", "z", 1])
        print(expr_v)

        # Save results
        results = {
            "timesteps": timesteps,
            "summary_step": summary_step,
            "learning_rate": learning_rate,
            "n_epochs1": n_epochs1,
            "n_epochs2": n_epochs2,
            "reg_weight_ph": reg_weight,
            "weights_d": weights_d,
            "weights_v": weights_v,
            "loss_plot": loss_list,
            "error_plot": error_list,
            "l12_plot": reg_list,
            "expr_d": expr_d,
            "expr_v": expr_v
        }

        trial_dir = helpers.get_trial_path(
            results_dir)  # Get directory in which to save trial results

        tf.saved_model.simple_save(sess,
                                   trial_dir,
                                   inputs={
                                       "x": x_input,
                                       "y0": prop_input,
                                       "training": training
                                   },
                                   outputs={
                                       "z": z,
                                       "y": y_hat_full
                                   })

        # Save a summary of the parameters and results
        with open(os.path.join(trial_dir, 'summary.pickle'), "wb+") as f:
            pickle.dump(results, f)
Beispiel #6
0
def main(results_dir='results/kinematics/test', learning_rate=1e-2, reg_weight=1e-3, n_epochs=10001,
         timesteps=5):
    tf.reset_default_graph()

    # Hyperparameters
    summary_step = 1000
    # tf.set_random_seed(0)

    # Import parabola data
    data = np.load('dataset/kinematic.npz')
    x_d = np.asarray(data["x_d"])
    x_v = np.asarray(data["x_v"])
    y_d = np.asarray(data["y_d"])
    y_v = np.asarray(data["y_v"])
    a_data = np.asarray(data["g"])

    # Prepare data
    # The first few time steps are reserved for the symbolic regression propagator
    x = np.stack((x_d, x_v), axis=2)    # Shape (N, NT, 2)
    y0 = np.stack((y_d[:, 0], y_v[:, 0]), axis=1)  # Input into the symbolic propagator
    y_data = np.stack((y_d[:, 1:timesteps + 1], y_v[:, 1:timesteps + 1]), axis=2)     # shape(NG, LENGTH, 2)

    # Encoder
    encoder = helpers.Encoder()     # layer should end with 1, which is the output
    x_input = tf.placeholder(shape=(None, x.shape[1], x.shape[2]), dtype=tf.float32, name="enc_input")
    y_input = tf.placeholder(shape=(None, timesteps, 2), dtype=tf.float32, name="label_input")
    y0_input = tf.placeholder(shape=(None, 2), dtype=tf.float32, name="y_input")  # input is d, v
    length_input = tf.placeholder(dtype=tf.int32, shape=())
    training = tf.placeholder_with_default(False, [])
    z = encoder(x_input, training=training)
    # enc_output = np.array(g_data)[:, np.newaxis]  # uncomment to ignore the autoencoder

    # Build EQL network for the propagating decoder
    primitive_funcs = [
        *[functions.Constant()] * 2,
        *[functions.Identity()] * 4,
        *[functions.Square()] * 4,
        *[functions.Sin()] * 2,
        *[functions.Exp()] * 2,
        *[functions.Sigmoid()] * 2,
        *[functions.Product(norm=0.1)] * 2,
    ]
    prop_d = SymbolicNetL0(2, funcs=primitive_funcs)
    prop_v = SymbolicNetL0(2, funcs=primitive_funcs)
    prop_d.build(4)
    prop_v.build(4)
    # Build recurrent structure
    rnn = tf.keras.layers.RNN(SymbolicCell(prop_d, prop_v), return_sequences=True)
    y0_rnn = tf.concat([tf.expand_dims(y0_input, axis=1), tf.zeros((tf.shape(y0_input)[0], length_input - 1, 2))], axis=1)
    prop_input = tf.concat([y0_rnn, tf.keras.backend.repeat(z, length_input),
                            tf.ones((tf.shape(y0_input)[0], length_input, 1))], axis=2)
    y_hat = rnn(prop_input)

    # Label and errors
    reg_loss = prop_d.get_loss() + prop_v.get_loss()

    # Training
    learning_rate_ph = tf.placeholder(tf.float32)
    opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate_ph)
    error = tf.losses.mean_squared_error(labels=y_input[:, :length_input, :], predictions=y_hat)
    loss = error + reg_weight * reg_loss
    train = opt.minimize(loss)
    train = tf.group([train, encoder.bn.updates])

    # Training session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True   # Take up variable amount of memory on GPU
    with tf.Session(config=config) as sess:
        loss_i = np.nan
        while np.isnan(loss_i):
            loss_list = []
            error_list = []
            reg_list = []

            sess.run(tf.global_variables_initializer())
            length_i = 1

            for i in range(n_epochs):
                lr_i = learning_rate

                feed_dict = {x_input: x, y0_input: y0, y_input: y_data,
                             learning_rate_ph: lr_i, training: True, length_input: length_i}
                _ = sess.run(train, feed_dict=feed_dict)
                if i % summary_step == 0:
                    feed_dict[training] = False
                    loss_val, error_val, reg_val = sess.run((loss, error, reg_loss), feed_dict=feed_dict)
                    loss_list.append(loss_val)
                    error_list.append(error_val)
                    reg_list.append(reg_val)
                    print("Epoch %d\tTotal loss: %f\tError: %f\tReg loss: %f" % (i, loss_val, error_val, reg_val))
                    loss_i = loss_val

                    if i > 3000:
                        length_i = timesteps
                    if np.isnan(loss_i):
                        break

        weights_d = sess.run(prop_d.get_weights())
        expr_d = pretty_print.network(weights_d, primitive_funcs, ["d", "v", "z", 1])
        print(expr_d)
        weights_v = sess.run(prop_v.get_weights())
        expr_v = pretty_print.network(weights_v, primitive_funcs, ["d", "v", "z", 1])
        print(expr_v)

        # z_arr = sess.run(enc_output, feed_dict=feed_dict)

        # Save results
        results = {
            "timesteps": timesteps,
            "summary_step": summary_step,
            "learning_rate": learning_rate,
            "N_EPOCHS": n_epochs,
            "reg_weight": reg_weight,
            "weights_d": weights_d,
            "weights_v": weights_v,
            "loss_plot": loss_list,
            "error_plot": error_list,
            "l12_plot": reg_list,
            "expr_d": expr_d,
            "expr_v": expr_v
        }

        trial_dir = helpers.get_trial_path(results_dir)     # Get directory in which to save trial results
        tf.saved_model.simple_save(sess, trial_dir,
                                   inputs={"x": x_input, "y0": y0_input, "training": training},
                                   outputs={"z": z, "y": y_hat})

        # Save a summary of the parameters and results
        with open(os.path.join(trial_dir, 'summary.pickle'), "wb+") as f:
            pickle.dump(results, f)
Beispiel #7
0
def train_add_test(func=lambda a, b: a+b, results_dir=None, reg_weight=5e-2, learning_rate=1e-2, n_epochs=10001):
    """Addition of two MNIST digits with a symbolic regression network.
    Withold sums > 15 for test data"""
    tf.reset_default_graph()

    # Symbolic regression network to combine the conv net outputs
    PRIMITIVE_FUNCS = [
        *[functions.Constant()] * 2,
        *[functions.Identity()] * 4,
        *[functions.Square()] * 4,
        *[functions.Sin()] * 2,
        *[functions.Exp()] * 2,
        *[functions.Sigmoid()] * 2,
        # *[functions.Product()] * 2,
    ]
    sr_net = symbolic_network.SymbolicNet(2, funcs=PRIMITIVE_FUNCS)  # Symbolic regression network
    # Overall architecture
    sym_digit_network = SymbolicDigit(sr_net=sr_net, normalize=normalize)
    # Set up regularization term and training
    penalty = regularization.l12_smooth(sr_net.get_weights())

    epoch = tf.placeholder_with_default(0.0, [])
    penalty = tf.sin(np.pi / n_epochs / 1.1 * epoch) ** 2 * regularization.l12_smooth(sr_net.get_weights())
    penalty = reg_weight * penalty
    sym_digit_network.set_training(reg=penalty)

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True   # Take up variable amount of memory on GPU
    sess = tf.Session(config=config)

    batch = batch_generator(batch_size=100)

    def train_fun(y):
        return y < 15

    def test_fun(y):
        return np.logical_not(train_fun(y))

    # Train, and restart training if loss goes to NaN
    loss_i = np.nan
    while np.isnan(loss_i):
        sess.run(tf.global_variables_initializer())
        loss_i = sym_digit_network.train(sess, n_epochs, batch, func, epoch, lr_val=learning_rate, train_fun=train_fun)
        if np.isnan(loss_i):
            continue

        # Freezing weights
        sr_net_masked = symbolic_network.MaskedSymbolicNet(sess, sr_net, threshold=0.01)
        sym_digit_network = SymbolicDigitMasked(sym_digit_network, sr_net_masked, normalize=normalize)
        sym_digit_network.set_training()

        loss_i = sym_digit_network.train(sess, n_epochs, batch, func, lr_val=learning_rate/10, train_fun=train_fun)

    # Print out human-readable equation (with regularization)
    weights = sess.run(sr_net.get_weights())
    expr = pretty_print.network(weights, PRIMITIVE_FUNCS, ["z1", "z2"])
    expr = normalize(expr)
    print(expr)

    # Calculate accuracy on test dataset
    acc_train, error_train = sym_digit_network.calc_accuracy(X_train, y_train, func, sess)
    acc_train1, error_train1 = sym_digit_network.calc_accuracy(X_train, y_train, func, sess, filter_fun=train_fun)
    acc_train2, error_train2 = sym_digit_network.calc_accuracy(X_train, y_train, func, sess, filter_fun=test_fun)
    acc_test, error_test = sym_digit_network.calc_accuracy(X_test, y_test, func, sess)
    acc_test1, error_test1 = sym_digit_network.calc_accuracy(X_test, y_test, func, sess, filter_fun=train_fun)
    acc_test2, error_test2 = sym_digit_network.calc_accuracy(X_test, y_test, func, sess, filter_fun=test_fun)
    result_str = "Train digits overall accuracy: %.3f\ttrain sum accuracy: %.3f\t test sum accuracy: %.3f\n" \
                 "Train digits overall error: %.3f\ttrain sum error: %.3f\t test sum error: %.3f\n" \
                 "Test digits overall accuracy: %.3f\ttrain sum accuracy: %.3f\t test sum accuracy: %.3f\n" \
                 "Test digits overall error: %.3f\ttrain sum error: %.3f\t test sum error: %.3f\n" % \
                 (acc_train, acc_train1, acc_train2, error_train, error_train1, error_train2,
                  acc_test, acc_test1, acc_test2, error_test, error_test1, error_test2)
    print(result_str)

    sym_digit_network.save_result(sess, results_dir, expr, result_str)
Beispiel #8
0
    def meta_learn(self, func_names, trials, val_func_names=None):
        """Meta-train the EQL network on data generated by the given functions.
        Arguments:
            func_names: list of strings that describes the functions
            trials: number of trials to train from scratch. Will save the results for each trial.
        """
        opt = optim.Adam(self.net.parameters(), self.outer_learning_rate)

        equations = dict()
        train_losses = dict()
        val_eq = dict()
        val_losses = dict()
        for func_name in func_names:
            equations[func_name] = []
            train_losses[func_name] = []
        if val_func_names is not None:
            for val_func_name in val_func_names:
                val_eq[val_func_name] = []
                val_losses[val_func_name] = []

        if self.train_mode == "maml":
            # ------------- each iteration is one MAML outer loop
            for counter in range(self.n_epochs1):
                verbose = (counter + 1) % 250 == 0
                opt.zero_grad()
                eval_loss = 0
                for func_name in func_names:
                    func = self.equation_dict[func_name]
                    assert self.x_dim == len(signature(func).parameters)
                    # adapt to func
                    eql_for_func = self.adapt(func, func_name, verbose,
                                              equations, train_losses)
                    # eval task performance
                    x, y = generate_data(func, N_QUERY)
                    inputs, labels = x, y
                    eval_loss += self.get_loss(eql_for_func, inputs, labels)
                eval_loss.backward()
                # Average the accumulated gradients and optimize
                for p in self.net.parameters():
                    p.grad.data.mul_(1.0 / len(func_names))
                opt.step()
                if val_func_names is not None:
                    # Validation step
                    for val_func_name in val_func_names:
                        func = self.equation_dict[val_func_name]
                        eql_for_func = self.adapt(func, val_func_name, verbose)
                        x, y = generate_data(func, N_QUERY)
                        inputs, labels = x, y
                        eval_loss += self.get_loss(eql_for_func, inputs,
                                                   labels)
                        val_losses[val_func_name].append(eval_loss.item())
        if self.train_mode == "joint":
            # -------------------- joint training
            for counter in range(self.n_epochs1):
                verbose = (counter + 1) % 250 == 0
                for func_name in func_names:
                    # get function, do fwd pass, compute loss
                    func = self.equation_dict[func_name]
                    inputs, labels = generate_data(func, N_SUPPORT + N_QUERY)
                    loss = self.get_loss(self.net, inputs, labels)
                    # bwd pass
                    opt.zero_grad()
                    loss.backward()
                    opt.step()
                    if verbose:
                        with torch.no_grad():
                            weights = self.net.get_weights()
                            expr = pretty_print.network(
                                weights, self.activation_funcs,
                                var_names[:self.x_dim])
                            print(expr)
                            equations[func_name].append(expr)
                            train_losses[func_name].append(loss)
                # validate
                if val_func_names is not None:  # Validation step
                    for val_func_name in val_func_names:
                        # deep copy self.net so that we don't see val functions during training
                        model = copy.deepcopy(self.net)
                        func = self.equation_dict[val_func_name]
                        inputs, labels = generate_data(func, N_SUPPORT)
                        # adapt
                        loss = self.get_loss(model, inputs, labels)
                        # bwd pass
                        opt.zero_grad()
                        loss.backward()
                        opt.step()
                        # eval
                        inputs, labels = generate_data(func, N_QUERY)
                        eql_val = self.get_loss(model, inputs, labels)
                        val_losses[val_func_name].append(eql_val.item())
        for func_name in func_names:
            # ----------------------------- write results to disk
            fi = open(
                os.path.join(self.results_dir,
                             'eq_summary_{}.txt'.format(func_name)), 'w')
            fi.write("\n{}\n".format(func_name))
            for expr in equations[func_name]:
                fi.write("%s\n" % (str(expr)))
            fi.close()
            np.save(
                os.path.join(self.results_dir,
                             'train_curve_{}'.format(func_name)),
                train_losses[func_name])
        for val_func_name in val_func_names:
            np.save(
                os.path.join(self.results_dir,
                             'val_curve_{}'.format(val_func_name)),
                val_losses[val_func_name])
Beispiel #9
0
    def adapt(self,
              func,
              func_name='',
              verbose=False,
              equations=None,
              train_losses=None):
        if verbose:
            print("****adapting to function {}****".format(func_name))

        # these should probably be command-line argument
        first_order = False
        allow_unused = False
        allow_nograd = False
        second_order = True

        x, y = generate_data(func, N_SUPPORT)
        inputs, labels = x, y

        # clone module and specify adaptation params
        learner = clone_module(self.net)
        diff_params = [p for p in learner.parameters() if p.requires_grad]

        # ---------------------------------begin learn2learn excerpt to compute gradients
        for _ in range(0, self.inner_steps):
            loss = self.get_loss(learner, inputs, labels)
            if allow_nograd:
                # Compute relevant gradients
                diff_params = [
                    p for p in learner.parameters() if p.requires_grad
                ]
                grad_params = grad(loss,
                                   diff_params,
                                   retain_graph=second_order,
                                   create_graph=second_order,
                                   allow_unused=allow_unused)
                gradients = []
                grad_counter = 0

                # Handles gradients for non-differentiable parameters
                for param in learner.parameters():
                    if param.requires_grad:
                        gradient = grad_params[grad_counter]
                        grad_counter += 1
                    else:
                        gradient = None
                    gradients.append(gradient)
            else:
                try:
                    gradients = grad(loss,
                                     learner.parameters(),
                                     retain_graph=second_order,
                                     create_graph=second_order,
                                     allow_unused=allow_unused)
                except RuntimeError:
                    traceback.print_exc()
                    print(
                        'learn2learn: Maybe try with allow_nograd=True and/or allow_unused=True ?'
                    )

            # Update the module
            learner = self.maml_update(learner, self.inner_learning_rate,
                                       gradients)
        adapted_learner = learner
        # -------------------------------------------------------------------------------end learn2learn excerpt
        if verbose:
            with torch.no_grad():
                weights = learner.get_weights()
                expr = pretty_print.network(weights, self.activation_funcs,
                                            var_names[:self.x_dim])
                print(expr)
                if equations is not None:
                    equations[func_name].append(expr)
                if train_losses is not None:
                    train_losses[func_name].append(loss)

        return adapted_learner
Beispiel #10
0
def main(results_dir='results/sho/test',
         trials=20,
         learning_rate=1e-3,
         reg_weight=1e-3,
         timesteps=25,
         batch_size=128,
         n_epochs1=10001,
         n_epochs2=10001):

    # Hyperparameters
    summary_step = 1000

    primitive_funcs = [
        *[functions.Constant()] * 2,
        *[functions.Identity()] * 4,
        *[functions.Square()] * 4,
        *[functions.Sin()] * 2,
        *[functions.Exp()] * 2,
        *[functions.Sigmoid()] * 2,
        *[functions.Product(norm=0.1)] * 2,
    ]

    # Import parabola data
    data = np.load('dataset/sho.npz')
    x_d = np.asarray(data["x_d"])
    x_v = np.asarray(data["x_v"])
    y_d = np.asarray(data["y_d"])
    y_v = np.asarray(data["y_v"])
    omega2_data = data["omega2"]
    N = data["N"]

    # Prepare data
    x = np.stack((x_d, x_v), axis=2)  # Shape (N, NT, 2)
    y0 = np.stack(
        (y_d[:, 0], y_v[:, 0]),
        axis=1)  # Initial conditions for prediction y, fed into propagator
    y_data = np.stack((y_d[:, 1:timesteps + 1], y_v[:, 1:timesteps + 1]),
                      axis=2)  # shape(NG, timesteps, 2)
    z_data = omega2_data[:, np.newaxis]

    # Tensorflow placeholders for x, y0, y
    x_input = tf.placeholder(shape=(None, x.shape[1], x.shape[2]),
                             dtype=tf.float32,
                             name="enc_input")
    y0_input = tf.placeholder(shape=(None, 2),
                              dtype=tf.float32,
                              name="prop_input")  # input is d, v
    y_input = tf.placeholder(shape=(None, timesteps, 2),
                             dtype=tf.float32,
                             name="label_input")
    length_input = tf.placeholder(dtype=tf.int32, shape=())

    # Dynamics encoder
    encoder = helpers.Encoder(n_filters=[16, 16, 16, 16])
    training = tf.placeholder_with_default(False, [])
    z = encoder(x_input, training=training)

    # Propagating decoders
    prop_d = SymbolicNetL0(2, funcs=primitive_funcs)
    prop_v = SymbolicNetL0(2, funcs=primitive_funcs)
    prop_d.build(4)
    prop_v.build(4)
    # Building recurrent structure
    rnn = tf.keras.layers.RNN(SymbolicCell(prop_d, prop_v),
                              return_sequences=True)
    y0_rnn = tf.concat([
        tf.expand_dims(y0_input, axis=1),
        tf.zeros((tf.shape(y0_input)[0], length_input - 1, 2))
    ],
                       axis=1)
    prop_input = tf.concat([
        y0_rnn,
        tf.keras.backend.repeat(z, length_input),
        tf.ones((tf.shape(y0_input)[0], length_input, 1))
    ],
                           axis=2)
    y_hat = rnn(prop_input)
    length_list = [1, 2, 3, 4, 5, 7, 10, 15,
                   25]  # Slowly increase the length of propagation

    # Training
    learning_rate_ph = tf.placeholder(tf.float32)
    opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate_ph)
    reg_weight_ph = tf.placeholder(tf.float32)
    reg_loss = prop_d.get_loss() + prop_v.get_loss()
    error = tf.losses.mean_squared_error(labels=y_input[:, :length_input, :],
                                         predictions=y_hat)
    loss = error + reg_weight_ph * reg_loss
    train = tf.group([opt.minimize(loss), encoder.bn.updates])

    batch = helpers.batch_generator([x, y_data, y0, z_data],
                                    N=N,
                                    batch_size=batch_size)

    # Training session
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    with tf.Session(config=config) as sess:
        for _ in range(trials):
            loss_i = np.nan

            while np.isnan(loss_i):
                loss_list = []
                error_list = []
                reg_list = []

                sess.run(tf.global_variables_initializer())
                length_i = 1

                for i in range(n_epochs1 + n_epochs2):
                    if i < n_epochs1:
                        lr_i = learning_rate
                    else:
                        lr_i = learning_rate / 10

                    x_batch, y_batch, y0_batch, z_batch = next(batch)
                    feed_dict = {
                        x_input: x_batch,
                        y0_input: y0_batch,
                        y_input: y_batch,
                        learning_rate_ph: lr_i,
                        training: True,
                        reg_weight_ph: reg_weight,
                        length_input: length_i
                    }

                    _ = sess.run(train, feed_dict=feed_dict)

                    if i % summary_step == 0:
                        feed_dict[training] = False
                        loss_i, error_i, reg_i, z_arr = sess.run(
                            (loss, error, reg_loss, z), feed_dict=feed_dict)
                        r = np.corrcoef(z_batch[:, 0], z_arr[:, 0])[1, 0]
                        loss_list.append(loss_i)
                        error_list.append(error_i)
                        reg_list.append(reg_i)
                        print(
                            "Epoch %d\tTotal loss: %f\tError: %f\tReg loss: %f\tCorrelation: %f"
                            % (i, loss_i, error_i, reg_i, r))
                        if np.isnan(loss_i):
                            break

                        i_length = min(i // 1000, len(length_list) - 1)
                        length_i = length_list[i_length]

            weights_d = sess.run(prop_d.get_weights())
            expr_d = pretty_print.network(weights_d, primitive_funcs,
                                          ["d", "v", "z", 1])
            print(expr_d)
            weights_v = sess.run(prop_v.get_weights())
            expr_v = pretty_print.network(weights_v, primitive_funcs,
                                          ["d", "v", "z", 1])
            print(expr_v)

            print("Done. Saving results.")

            # z_arr = sess.run(z, feed_dict=feed_dict)

            # Save results
            results = {
                "summary_step": summary_step,
                "learning_rate": learning_rate,
                "n_epochs1": n_epochs1,
                "reg_weight": reg_weight,
                "timesteps": timesteps,
                "weights_d": weights_d,
                "weights_v": weights_v,
                "loss_plot": loss_list,
                "error_plot": error_list,
                "reg_plot": reg_list,
                "expr_d": expr_d,
                "expr_v": expr_v
            }

            trial_dir = helpers.get_trial_path(
                results_dir)  # Get directory in which to save trial results

            tf.saved_model.simple_save(sess,
                                       trial_dir,
                                       inputs={
                                           "x": x_input,
                                           "y0": y0_input,
                                           "training": training
                                       },
                                       outputs={
                                           "z": z,
                                           "y": y_hat
                                       })

            # Save a summary of the parameters and results
            with open(os.path.join(trial_dir, 'summary.pickle'), "wb+") as f:
                pickle.dump(results, f)

            with open(os.path.join(results_dir, 'eq_summary.txt'), 'a') as f:
                f.write(str(expr_d) + "\n")
                f.write(str(expr_v) + "\n")
                f.write("Error: %f\n\n" % error_list[-1])
                   #                  torch.zeros(size=(width, width + n_double)),
                   #                  torch.zeros(size=(width, 1))],
                   # initial_weights=[torch.ones(size=(x_dim, width + n_double)),  # kind of a hack for truncated normal
                   #                  torch.ones(size=(width, width + n_double)),
                   #                  torch.ones(size=(width, width + n_double)),
                   #                  torch.ones(size=(width, 1))],
                   # initial_weights=[torch.fmod(torch.normal(0, 1, size=(x_dim, width + n_double)), 2),  # kind of a hack for truncated normal
                   #                  torch.fmod(torch.normal(0, 1, size=(width, width + n_double)), 2),
                   #                  torch.fmod(torch.normal(0, 1, size=(width, width + n_double)), 2),
                   #                  torch.fmod(torch.normal(0, 1, size=(width, 1)), 2)
                   #                  ]
 )
 
 with torch.no_grad():
     weights = sym.get_weights()
     expr = pretty_print.network(weights, activation_funcs, var_names[:x_dim])
     print(expr)
 
 optimizer = torch.optim.Adam(sym.parameters(), lr=0.01)
 loss_func = torch.nn.MSELoss()
 y = func(x)
 for i in range(1000):
     yhat = sym(x)
     reg = torch.tensor(0.)
     for param in sym.parameters():
         reg = reg + 0.01*torch.norm(param, 0.5)
     loss = loss_func(yhat, y) + reg
 
     optimizer.zero_grad()
     loss.backward()
     optimizer.step()
Beispiel #12
0
    def train(self, func, func_name='', trials=1, func_dir='results/test'):
        """Train the network to find a given function"""

        x, y = generate_data(func, N_TRAIN)
        # x_val, y_val = generate_data(func, N_VAL)
        x_test, y_test = generate_data(func,
                                       N_TEST,
                                       range_min=DOMAIN_TEST[0],
                                       range_max=DOMAIN_TEST[1])

        # Setting up the symbolic regression network
        x_dim = len(signature(
            func).parameters)  # Number of input arguments to the function
        x_placeholder = tf.placeholder(shape=(None, x_dim), dtype=tf.float32)
        width = len(self.activation_funcs)
        n_double = functions.count_double(self.activation_funcs)
        sym = SymbolicNet(self.n_layers,
                          funcs=self.activation_funcs,
                          initial_weights=[
                              tf.truncated_normal([x_dim, width + n_double],
                                                  stddev=init_sd_first),
                              tf.truncated_normal([width, width + n_double],
                                                  stddev=init_sd_middle),
                              tf.truncated_normal([width, width + n_double],
                                                  stddev=init_sd_middle),
                              tf.truncated_normal([width, 1],
                                                  stddev=init_sd_last)
                          ])
        # sym = SymbolicNet(self.n_layers, funcs=self.activation_funcs)
        y_hat = sym(x_placeholder)

        # Label and errors
        error = tf.losses.mean_squared_error(labels=y, predictions=y_hat)
        error_test = tf.losses.mean_squared_error(labels=y_test,
                                                  predictions=y_hat)
        reg_loss = l12_smooth(sym.get_weights())
        loss = error + self.reg_weight * reg_loss

        # Set up TensorFlow graph for training
        learning_rate = tf.placeholder(tf.float32)
        opt = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
        train = opt.minimize(loss)

        # Arrays to keep track of various quantities as a function of epoch
        loss_list = []  # Total loss (MSE + regularization)
        error_list = []  # MSE
        reg_list = []  # Regularization
        error_test_list = []  # Test error

        error_test_final = []
        eq_list = []

        # Only take GPU memory as needed - allows multiple jobs on a single GPU
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            for trial in range(trials):
                print("Training on function " + func_name + " Trial " +
                      str(trial + 1) + " out of " + str(trials))

                loss_val = np.nan
                # Restart training if loss goes to NaN (which happens when gradients blow up)
                while np.isnan(loss_val):
                    sess.run(tf.global_variables_initializer())

                    t0 = time.time()
                    # First stage of training, preceded by 0th warmup stage
                    for i in range(self.n_epochs1 + 2000):
                        if i < 2000:
                            lr_i = self.learning_rate * 10
                        else:
                            lr_i = self.learning_rate

                        feed_dict = {x_placeholder: x, learning_rate: lr_i}
                        _ = sess.run(train, feed_dict=feed_dict)
                        if i % self.summary_step == 0:
                            loss_val, error_val, reg_val, = sess.run(
                                (loss, error, reg_loss), feed_dict=feed_dict)
                            error_test_val = sess.run(
                                error_test, feed_dict={x_placeholder: x_test})
                            print(
                                "Epoch: %d\tTotal training loss: %f\tTest error: %f"
                                % (i, loss_val, error_test_val))
                            loss_list.append(loss_val)
                            error_list.append(error_val)
                            reg_list.append(reg_val)
                            error_test_list.append(error_test_val)
                            if np.isnan(
                                    loss_val
                            ):  # If loss goes to NaN, restart training
                                break

                    t1 = time.time()

                    # Masked network - weights below a threshold are set to 0 and frozen. This is the fine-tuning stage
                    sym_masked = MaskedSymbolicNet(sess, sym)
                    y_hat_masked = sym_masked(x_placeholder)
                    error_masked = tf.losses.mean_squared_error(
                        labels=y, predictions=y_hat_masked)
                    error_test_masked = tf.losses.mean_squared_error(
                        labels=y_test, predictions=y_hat_masked)
                    train_masked = opt.minimize(error_masked)

                    # 2nd stage of training
                    t2 = time.time()
                    for i in range(self.n_epochs2):
                        feed_dict = {
                            x_placeholder: x,
                            learning_rate: self.learning_rate / 10
                        }
                        _ = sess.run(train_masked, feed_dict=feed_dict)
                        if i % self.summary_step == 0:
                            loss_val, error_val = sess.run(
                                (loss, error_masked), feed_dict=feed_dict)
                            error_test_val = sess.run(
                                error_test_masked,
                                feed_dict={x_placeholder: x_test})
                            print(
                                "Epoch: %d\tTotal training loss: %f\tTest error: %f"
                                % (i, loss_val, error_test_val))
                            loss_list.append(loss_val)
                            error_list.append(error_val)
                            error_test_list.append(error_test_val)
                            if np.isnan(
                                    loss_val
                            ):  # If loss goes to NaN, restart training
                                break
                    t3 = time.time()
                tot_time = t1 - t0 + t3 - t2
                print(tot_time)

                # Print the expressions
                weights = sess.run(sym_masked.get_weights())
                expr = pretty_print.network(weights, self.activation_funcs,
                                            var_names[:x_dim])
                print(expr)

                # Save results
                trial_file = os.path.join(func_dir, 'trial%d.pickle' % trial)
                results = {
                    "weights": weights,
                    "loss_list": loss_list,
                    "error_list": error_list,
                    "reg_list": reg_list,
                    "error_test": error_test_list,
                    "expr": expr,
                    "runtime": tot_time
                }
                with open(trial_file, "wb+") as f:
                    pickle.dump(results, f)

                error_test_final.append(error_test_list[-1])
                eq_list.append(expr)

        return eq_list, error_test_final