def train_model(graph, seq_params):
    nb_iter = 1
    nb_epochs = 10000
    len_seq = 20
    num_seqs = 100000

    X_train = np.zeros((num_seqs, len_seq, 60), dtype=np.bool)
    y_train = np.zeros((num_seqs, 60), dtype=np.bool)

    from monitoring import LossHistory
    history = LossHistory()
    checkpointer = ModelCheckpoint(filepath=save_model_path,
                                   verbose=1,
                                   save_best_only=True)

    for e in range(nb_iter):
        print('-' * 40)
        print('Iteration', e)
        print('-' * 40)

        print("Generating training data...")
        get_random_batch(X_train, y_train, seq_params)
        print("Fitting data...")
        earlystopper = EarlyStopping(monitor='val_loss',
                                     patience=25,
                                     verbose=2)
        graph.fit({
            'input': X_train,
            'out1': y_train[:, :60]
        },
                  validation_split=0.3,
                  batch_size=128,
                  nb_epoch=nb_epochs,
                  callbacks=[checkpointer, earlystopper, history])
Beispiel #2
0
def train_model(graph):
    nb_iter = 1
    nb_epochs = 10000
    len_seq=20
    num_seqs=100000

    seq_params = {
        "freqs":{'S':1},
        "add_noise": False,
        "mult": 59,
        "time_repr":time_representation,
        "label_repr":label_representation
    }
    X_train = np.zeros((num_seqs, len_seq, len_circ_repr), dtype=np.float)
    y_train = np.zeros((num_seqs, 120), dtype=np.bool)

    from monitoring import LossHistory
    history = LossHistory()
    checkpointer = ModelCheckpoint(filepath=save_model_path, verbose=1, save_best_only=True)

    for e in range(nb_iter):
        print('-'*40)
        print('Iteration', e)
        print('-'*40)

        print("Generating training data...")
        get_random_batch(X_train, y_train, seq_params)
        print("Fitting data...")
        earlystopper = EarlyStopping(monitor='val_loss', patience=100, verbose=2)
        graph.fit({'input':X_train, 'out1':y_train[:,:120]}, validation_split = 0.3, batch_size=128, nb_epoch=nb_epochs, callbacks=[checkpointer, earlystopper, history]) 
def train_model(graph):
    nb_iter = 1
    nb_epochs = 10000
    len_seq=20
    num_seqs=100000

    seq_params = {
        "freqs":{'S':1},
        "add_noise": False,
        "mult": 30,
        "time_repr":time_representation,
        "label_repr": label_representation
    }
    X_train = np.zeros((num_seqs, len_seq, len_circ_repr), dtype=np.float)
    y_train = np.zeros((num_seqs, 60), dtype=np.bool)

    from monitoring import LossHistory
    history = LossHistory()
    checkpointer = ModelCheckpoint(filepath=save_model_path, verbose=1, save_best_only=True)

    for e in range(nb_iter):
        print('-'*40)
        print('Iteration', e)
        print('-'*40)

        print("Generating training data...")
        get_random_batch(X_train, y_train, seq_params)
        print("Fitting data...")
        earlystopper = EarlyStopping(monitor='val_loss', patience=25, verbose=2)
        graph.fit({'input':X_train, 'out1':y_train[:,:60]}, validation_split = 0.3, batch_size=128, nb_epoch=nb_epochs, callbacks=[checkpointer, earlystopper, history]) 
Beispiel #4
0
def train_aenet_model(config):
    tf.reset_default_graph()
    tf_config = tf.ConfigProto()

    if RUN_IN_GPU:
        tf_config.gpu_options.allow_growth = True

    sess = tf.Session(config=tf_config)

    train_lbs, _ = DataHandler.load_labels(config['train_lbs_file'])
    print('Loading training data...done')

    aenet = AENet(sess, config, 'AENet', is_train=True)
    print('Building AENet model...done')

    print('Training...')
    for i in range(config['iterations']):
        batch_lbs, _ = get_random_batch(train_lbs, config['batch_size'])
        cur_loss = aenet.fit(batch_lbs)
        print('Iteration {:>8d}/{}: Loss: {}'.format(i + 1,
                                                     config['iterations'],
                                                     cur_loss))

    aenet.save(config['ckpt_dir'])
    print('Saving current AENet model...done')

    print('Training...done')

    tf.reset_default_graph()
    sess.close()
Beispiel #5
0
def demo(graph):
    plt.ion()
    fig_size = plt.rcParams["figure.figsize"]
    fig_size[0] = 16
    fig_size[1] = 12
    plt.rcParams["figure.figsize"] = fig_size
    f, (ax3) = plt.subplots(1, 1, sharey=True)
    ax3.set_title('S')
    ax3.set_ylim(ymax=1, ymin=0)
    ax3.set_xlim(xmax=59, xmin=0)
    plt.xticks(np.arange(0, 60, 1.0))

    prob_seconds, = ax3.plot([], [], linewidth=2.0)
    true_seconds = ax3.bar(range(60),
                           np.zeros(60),
                           width=0.5,
                           color='lightpink',
                           align='center')

    num_seqs = 200
    len_seq = 10
    X = np.zeros((num_seqs, len_seq, len_circ_repr), dtype=np.float)
    y = np.zeros((num_seqs, 60), dtype=np.bool)
    seq_params = {
        #"freqs":{'D':86400,'H':3600,'T':60,'S':1, 'B':86400, 'W-SUN':86400*7 },
        "freqs": {
            'S': 1
        },
        "add_noise": False,
        "mult": 59,
        "time_repr": time_representation,
        "label_repr": label_representation
    }

    freqs, data = get_random_batch(X, y, seq_params)

    for s in range(num_seqs):
        f.suptitle("freqency: " + str(freqs[s + len_seq]), fontsize=20)
        secs = np.argmax(y[s])
        pred_probs = next_prediction(graph, X[s:s + 1, :, :])

        prob_seconds.set_ydata(pred_probs)
        prob_seconds.set_xdata(range(60))

        for i, b in enumerate(true_seconds):
            if i == secs:
                b.set_height(1)
            else:
                b.set_height(0)

        f.canvas.draw()
        sleep(2)
def demo(graph):
    plt.ion()
    fig_size = plt.rcParams["figure.figsize"]
    fig_size[0] = 16
    fig_size[1] = 12
    plt.rcParams["figure.figsize"] = fig_size
    f, (ax3) = plt.subplots(1, 1, sharey=True)
    ax3.set_title('S')
    ax3.set_ylim(ymax = 1, ymin = 0)
    ax3.set_xlim(xmax = 59, xmin = 0)
    plt.xticks(np.arange(0, 60, 1.0))

    prob_seconds, = ax3.plot([], [], linewidth=2.0)
    true_seconds = ax3.bar(range(60), np.zeros(60), width=0.5, color='lightpink', align='center')

    num_seqs = 100
    len_seq = 10
    X = np.zeros((num_seqs, len_seq, len_circ_repr), dtype=np.float)
    y = np.zeros((num_seqs, 120), dtype=np.bool)

    seq_params = {
        #"freqs":{'D':86400,'H':3600,'T':60,'S':1, 'B':86400, 'W-SUN':86400*7 },
        "freqs":{'S':1},
        "add_noise": False,
        "mult": 59,
        "time_repr":time_representation,
        "label_repr": label_representation,
    }

    freqs, data = get_random_batch(X, y, seq_params)

    for s in range(num_seqs):
        f.suptitle("freqency: " + str(freqs[s+len_seq]), fontsize=20)
        secs = np.argmax(y[s])%60
        preds = next_prediction(graph, X[s:s+1, :, :])
        pred_probs = np.maximum(preds[:60], preds[60:])

        prob_seconds.set_ydata(pred_probs)
        prob_seconds.set_xdata(range(60))
        
        for i,b in enumerate(true_seconds):
            if i == secs:
                b.set_height(1)
            else:
                b.set_height(0)

        f.canvas.draw()
        sleep(2)
def demo(graph, seq_params):
    plt.ion()
    fig_size = plt.rcParams["figure.figsize"]
    fig_size[0] = 16
    fig_size[1] = 12
    plt.rcParams["figure.figsize"] = fig_size
    f, (ax3) = plt.subplots(1, 1, sharey=True)
    ax3.set_title('S')
    ax3.set_ylim(ymax=1, ymin=0)
    ax3.set_xlim(xmax=59, xmin=0)
    plt.xticks(np.arange(0, 60, 1.0))

    prob_seconds, = ax3.plot([], [], linewidth=2.0)
    true_seconds = ax3.bar(range(60),
                           np.zeros(60),
                           width=0.5,
                           color='lightpink',
                           align='center')

    num_seqs = 200
    len_seq = 10
    X = np.zeros((num_seqs, len_seq, 60), dtype=np.bool)
    y = np.zeros((num_seqs, 60), dtype=np.bool)

    freqs, data = get_random_batch(X, y, seq_params)

    for s in range(num_seqs):
        f.suptitle("freqency: " + str(freqs[s + len_seq]), fontsize=20)
        secs = np.argmax(y[s])
        pred_probs = next_prediction(graph, X[s:s + 1, :, :])

        prob_seconds.set_ydata(pred_probs)
        prob_seconds.set_xdata(range(60))

        for i, b in enumerate(true_seconds):
            if i == secs:
                b.set_height(1)
            else:
                b.set_height(0)

        f.canvas.draw()
        sleep(2)
Beispiel #8
0
def test_acregnet_model(config):
    tf.reset_default_graph()
    sess = tf.Session()

    test_ims, _ = DataHandler.load_images(config['test_ims_file'])
    print('Loading test data...done')

    config['batch_size'] = test_ims.shape[0] * 2
    config['image_size'] = [256, 256]

    acregnet = ACRegNet(sess, config, 'ACRegNet', is_train=False)
    print('Building AC-RegNet model...done')
    acregnet.restore(config['ckpt_dir'])
    print('Loading trained AC-RegNet model...done')

    batch_ims_x, batch_ims_y = get_random_batch(test_ims, config['batch_size'])

    print('Testing...')
    acregnet.deploy(config['result_dir'], batch_ims_x, batch_ims_y, True)

    print('Testing...done')
Beispiel #9
0
def train(model_path, data, sess, saver, placeholders, model, opt, args):

    for epoch in range(args.epochs):
        t = time.time()
        batch, labels = get_random_batch(args.batch_size, data)
        outs = sess.run(
            [opt.opt_op, opt.cost],
            feed_dict={
                placeholders['inputs']: batch,
                placeholders['dropout']: args.dropout,
                placeholders['labels']: labels
            })
        avg_cost = outs[1]

        if epoch % 100 == 0:
            print("Epoch:", '%04d' % (epoch + 1), "train_loss=",
                  "{:.5f}".format(avg_cost), "time=",
                  "{:.3f}".format(time.time() - t))

        if epoch % 1000 == 0 and epoch != 0:
            save_path = saver.save(sess, model_path)
            print('saving checkpoint at', save_path)
def demo(graph, seq_params):
    plt.ion()
    fig_size = plt.rcParams["figure.figsize"]
    fig_size[0] = 16
    fig_size[1] = 12
    plt.rcParams["figure.figsize"] = fig_size
    f, (ax3) = plt.subplots(1, 1, sharey=True)
    ax3.set_title('S')
    ax3.set_ylim(ymax = 1, ymin = 0)
    ax3.set_xlim(xmax = 59, xmin = 0)
    plt.xticks(np.arange(0, 60, 1.0))

    prob_seconds, = ax3.plot([], [], linewidth=2.0)
    true_seconds = ax3.bar(range(60), np.zeros(60), width=0.5, color='lightpink', align='center')

    num_seqs = 200
    len_seq = 10
    X = np.zeros((num_seqs, len_seq, 60), dtype=np.bool)
    y = np.zeros((num_seqs, 60), dtype=np.bool)

    freqs, data = get_random_batch(X, y, seq_params)

    for s in range(num_seqs):
        f.suptitle("freqency: " + str(freqs[s+len_seq]), fontsize=20)
        secs = np.argmax(y[s])
        pred_probs = next_prediction(graph, X[s:s+1, :, :])

        prob_seconds.set_ydata(pred_probs)
        prob_seconds.set_xdata(range(60))
        
        for i,b in enumerate(true_seconds):
            if i == secs:
                b.set_height(1)
            else:
                b.set_height(0)

        f.canvas.draw()
        sleep(2)
Beispiel #11
0
def train(epochs, batch_size, input_dir, model_save_dir):

    # Make an instance of the VGG class
    vgg_model = VGG_MODEL(image_shape)

    # Get High-Resolution(HR) [148,148,3] in this case and corresponding Low-Resolution(LR) images
    x_train_lr, x_train_hr = utils.load_training_data(input_dir, [148, 148, 3])

    #Based on the the batch size, get the total number of batches
    batch_count = int(x_train_hr.shape[0] / batch_size)

    #Get the downscaled image shape based on the downscale factor
    image_shape_downscaled = utils.get_downscaled_shape(
        image_shape, downscale_factor)

    # Initialize the generator network with the input image shape as the downscaled image shape (shape of LR images)
    generator = networks.Generator(input_shape=image_shape_downscaled)

    # Initialize the discriminator with the input image shape as the original image shape (HR image shape)
    discriminator = networks.Discriminator(image_shape)

    # Get the optimizer to tweak parameters based on loss
    optimizer = vgg_model.get_optimizer()

    # Compile the three models - generator, discriminator and gan(comb of both gen and disc - this network will train generator and will not tweak discriminator)
    generator.compile(loss=vgg_model.vgg_loss, optimizer=optimizer)
    discriminator.compile(loss="binary_crossentropy", optimizer=optimizer)
    gan = networks.GAN_Network(generator, discriminator,
                               image_shape_downscaled, optimizer,
                               vgg_model.vgg_loss)

    # Run training for the number of epochs defined
    for e in range(1, epochs + 1):
        print('-' * 15, 'Epoch %d' % e, '-' * 15)
        for _ in tqdm(range(batch_count)):

            # Get the next batch of LR and HR images
            image_batch_lr, image_batch_hr = utils.get_random_batch(
                x_train_lr, x_train_hr, x_train_hr.shape[0], batch_size)

            generated_images_sr = generator.predict(image_batch_lr)
            print(generated_images_sr.shape)

            real_data_Y = np.ones(
                batch_size) - np.random.random_sample(batch_size) * 0.2
            fake_data_Y = np.random.random_sample(batch_size) * 0.2

            discriminator.trainable = True
            print(real_data_Y.shape)
            d_loss_real = discriminator.train_on_batch(image_batch_hr,
                                                       real_data_Y)
            d_loss_fake = discriminator.train_on_batch(generated_images_sr,
                                                       fake_data_Y)
            discriminator_loss = 0.5 * np.add(d_loss_fake, d_loss_real)

            rand_nums = np.random.randint(0,
                                          x_train_hr.shape[0],
                                          size=batch_size)
            image_batch_hr = x_train_hr[rand_nums]
            image_batch_lr = x_train_lr[rand_nums]

            gan_Y = np.ones(
                batch_size) - np.random.random_sample(batch_size) * 0.2
            discriminator.trainable = False
            gan_loss = gan.train_on_batch(image_batch_lr,
                                          [image_batch_hr, gan_Y])

        print("discriminator_loss : %f" % discriminator_loss)
        print("gan_loss :", gan_loss)
        gan_loss = str(gan_loss)

        if e % 50 == 0:
            generator.save_weights(model_save_dir + 'gen_model%d.h5' % e)
            discriminator.save_weights(model_save_dir + 'dis_model%d.h5' % e)

    networks.save_model(gan)
batches_train = images2batches(images_train)

# Scailing
batches_train = batches_train / 255

# Create neural network
neural_network = EncDecNetLite()
# Initialize weights
neural_network.init()

losses = []

# Main cycle
for i in range(UPDATES_NUM):
    # Get random batch for Stochastic Gradient Descent
    X_batch_train = get_random_batch(batches_train, BATCH_SIZE)

    # Forward pass, calculate network''s outputs
    Y_batch = neural_network.forward(X_batch_train)

    # Calculate sum squared loss
    loss = get_loss(Y_batch, X_batch_train)

    # Backward pass, calculate derivatives of loss w.r.t. weights
    dw = neural_network.backprop(Y_batch, X_batch_train)

    # Correct neural network''s weights
    neural_network.apply_dw(dw)

    # Print the loss every 1000 iterations
    if i % 10 == 0:
Beispiel #13
0
    return repres


X_test = np.zeros((test_size, seq_size, len_circ_repr), dtype=np.float)
y_test = np.zeros((test_size, len_circ_repr), dtype=np.float)

params = {
#    "freqs":{'D':86400,'H':3600,'T':60,'S':1, 'B':86400, 'W-SUN':86400*7 },
    "freqs":{'S':1, 'T':60},
    "add_noise": False,
    "mult": 59,
    "time_repr":time_representation
}

freqs = get_random_batch(X_test, y_test, params)



## build the model: 
print('Build model...')
graph = Graph()
graph.add_input(name='input', ndim=3)
graph.add_node(GRU(len_circ_repr, 128, return_sequences=True), name='gru1', input='input')
graph.add_node(GRU(128, 128, return_sequences=False), name='gru2', input='gru1')
graph.add_node(Dense(128, 2, activation='tanh'), name='split1', input='gru2')
graph.add_node(Dense(128, 2, activation='tanh'), name='split2', input='gru2')
graph.add_node(Dense(128, 2, activation='tanh'), name='split3', input='gru2')
#graph.add_node(GRU(32, 2, return_sequences=False), name='split1', input='tdd')
#graph.add_node(TimeDistributedDense(32, 2, activation='tanh'), name='split2', input='gru')
#graph.add_node(TimeDistributedDense(32, 2, activation='tanh'), name='split3', input='gru')
Beispiel #14
0
    adj_norm_batch, adj_orig_batch, adj_idx = get_consecutive_batch(
        0, args.batch_size, adj, adj_norm)
    features = features_batch
    feed_dict = construct_feed_dict(adj_norm_batch, adj_orig_batch, features,
                                    placeholders)
    feed_dict.update({placeholders['dropout']: args.dropout})
    outs = sess.run([model.reconstructions], feed_dict=feed_dict)

    reconstructions = outs[0].reshape([args.batch_size, 180, 180])

    #     Visualize sample full matrix of original, normalized, and reconstructed batches.
    for i in range(adj_orig_batch.shape[0]):
        visualize_matrix(adj_orig_batch, i, model_name, 'original_' + str(i))
        visualize_matrix(adj_norm_batch, i, model_name, 'normalized_' + str(i))
        visualize_matrix(reconstructions, i, model_name,
                         'reconstruction_' + str(i))

    adj_norm_batch, adj_orig_batch, adj_idx = get_random_batch(
        args.batch_size, adj, adj_norm)
    features = features_batch
    feed_dict = construct_feed_dict(adj_norm_batch, adj_orig_batch, features,
                                    placeholders)
    feed_dict.update({placeholders['dropout']: args.dropout})
    outs = sess.run([model.z_mean], feed_dict=feed_dict)

    z = outs[0]

    # Visualize Latent Space
    onehot = np.array([0 if idx < 203 else 1 for idx in adj_idx])
    visualize_latent_space(z, onehot, model_name)
Beispiel #15
0
def main(args, silent_mode=False):
    if not silent_mode:
        print 'Using the following settings:'
        for arg, value in args.__dict__.items():
            print arg, ':', value

    # hyperparameters/settings
    optimizer = tf.train.RMSPropOptimizer
    #optimizer = tf.train.AdamOptimizer
    BMLP_ACTIVATION = tf.nn.relu
    EPS = 1e-3
    hidden_dim = args.hidden_dim
    source_dim = args.source_dim
    input_dim = args.mix_dim

    if args.visdom:
        vis = visdom.Visdom(server=args.vd_server,
                            port=args.vd_port,
                            env='main')

    if not args.backprop and not silent_mode:
        print 'Not backpropagaging through product distribution'

    plot_size = args.plot_truncate

    #################### get the data ####################
    if args.data == 'synthetic':
        all_x, all_y, A = synthetic.get_data(seed=101,
                                             task_type=args.task,
                                             mix_dim=input_dim)
        val_x = all_x[:, :args.n_validation]
        val_y = all_y[:, :args.n_validation]
        train_x = all_x[:, args.n_validation:]
        train_y = all_y[:, args.n_validation:]
        plot_size = 500
    elif args.data == 'audio':
        linear_mix, pnl_mix, A, sources = audio.get_data()
        if args.task == 'linear':
            all_y = linear_mix
        elif args.task == 'pnl':
            all_y = pnl_mix
        else:
            raise ValueError('task not supported for data set')
        all_x = sources
        val_x = all_x[:, :args.n_validation]
        val_y = all_y[:, :args.n_validation]
        train_x = all_x[:, args.n_validation:]
        train_y = all_y[:, args.n_validation:]

    if args.blind:
        train_x = train_y.copy()
    ######################################################

    # construct the parts or the graph which contain trainable parameters
    with tf.variable_scope('separator'):
        if args.separator_type == 'linear':
            separator = MLP([input_dim, source_dim], [None],
                            stddev=args.sep_stddev)
        elif args.separator_type == 'pnl':
            linear_separator = MLP([input_dim, source_dim], [None],
                                   stddev=args.sep_stddev)
            in_block = MLPBlock(input_dim,
                                32,
                                n_layers=2,
                                stddev=args.sep_stddev)

            def separator(x):
                return linear_separator(in_block(x,
                                                 activation=BMLP_ACTIVATION))
        elif args.separator_type == 'mlp':
            separator = MLP([input_dim, hidden_dim, hidden_dim, source_dim],
                            [tf.nn.relu, tf.nn.relu, None],
                            stddev=args.sep_stddev)

        if args.mixer_type == 'linear':
            mixer = MLP([source_dim, input_dim], [None],
                        stddev=args.mix_stddev)
        elif args.mixer_type == 'pnl':
            linear_mixer = MLP([source_dim, input_dim], [None],
                               stddev=args.mix_stddev)
            out_block = MLPBlock(input_dim,
                                 16,
                                 n_layers=2,
                                 bias_value=0.0,
                                 stddev=args.mix_stddev)

            def mixer(x):
                return out_block(linear_mixer(x), activation=BMLP_ACTIVATION)
        else:
            mixer = MLP([source_dim, hidden_dim, hidden_dim, input_dim],
                        [tf.nn.relu, tf.nn.relu, None],
                        stddev=args.mix_stddev)

        if args.prior == 'trainable':
            prior_bmlp = MLPBlock(source_dim,
                                  32,
                                  n_layers=2,
                                  stddev=args.prior_stddev)

        if args.normalize:
            initial_gamma = tf.constant(.1, shape=(source_dim, ))
            gamma = tf.Variable(initial_gamma, name='gamma')
            initial_beta = tf.constant(0.0, shape=(source_dim, ))
            beta = tf.Variable(initial_beta, name='beta')

    with tf.variable_scope('discriminator'):
        if args.task == 'mlp':
            discriminator = MLP([source_dim, hidden_dim, hidden_dim, 1],
                                [tf.nn.relu, tf.nn.relu, None],
                                stddev=args.disc_stddev)
        else:
            discriminator = MLP([source_dim, 64, 1], [tf.nn.relu, None],
                                stddev=args.disc_stddev)

    sep_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 scope='separator')
    disc_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                  scope='discriminator')

    y = tf.placeholder(tf.float32, shape=[None, input_dim])
    if not args.blind:
        x = tf.placeholder(tf.float32, shape=[None, source_dim])

    prediction = separator(y)

    prediction_processed = prediction

    if args.normalize:
        prediction_mean = tf.reduce_mean(prediction, 0)
        # note that we don't want to use -= here.
        prediction_processed = prediction_processed - prediction_mean
        prediction_sd = tf.sqrt(
            tf.reduce_mean(prediction_processed**2, 0) + EPS)
        prediction_processed /= prediction_sd

    if args.prior == 'anica':
        prediction_perm = resample_rows_per_column(prediction_processed)
    elif args.prior == 'gaussian':
        prediction_perm = tf.random_normal(tf.shape(prediction))
    elif args.prior == 'uniform':
        prediction_perm = tf.random_uniform(tf.shape(prediction))
    elif args.prior == 'trainable':
        prior_samp = tf.random_normal(tf.shape(prediction))
        prediction_perm = prior_bmlp(prior_samp, activation=BMLP_ACTIVATION)
        if args.normalize:
            prediction_perm_mean = tf.reduce_mean(prediction_perm, 0)
            prediction_perm_norm = prediction_perm - prediction_perm_mean
            prediction_perm_sd = tf.sqrt(
                tf.reduce_mean(prediction_perm_norm**2, 0) + EPS)
            prediction_perm_norm /= prediction_perm_sd
            prediction_perm = prediction_perm_norm

    else:
        raise ValueError("Unknown 'prior'")

    joint_logit = discriminator(prediction_processed)
    marg_logit = discriminator(prediction_perm)

    if args.gan_type == 'default':
        disc_cost = (tf.reduce_mean(tf.nn.softplus(-marg_logit)) +
                     tf.reduce_mean(tf.nn.softplus(joint_logit)))
        if args.backprop:
            gen_cost = -disc_cost
        else:
            gen_cost = -tf.reduce_mean(tf.nn.softplus(joint_logit))
    elif args.gan_type == 'kl':
        disc_cost = (tf.reduce_mean(tf.nn.softplus(-marg_logit)) +
                     tf.reduce_mean(tf.nn.softplus(joint_logit)))
        # there is no grad wrt the marginals by definition for this loss
        gen_cost = -tf.reduce_mean(joint_logit)
    elif args.gan_type == 'bgan':
        disc_cost = (tf.reduce_mean(tf.nn.softplus(-marg_logit)) +
                     tf.reduce_mean(tf.nn.softplus(joint_logit)))
        if args.backprop:
            gen_cost = (tf.reduce_mean(marg_logit**2) +
                        tf.reduce_mean(joint_logit**2))
        else:
            gen_cost = tf.reduce_mean(joint_logit**2)
    elif args.gan_type == 'wgan-gp':
        joint_term = tf.reduce_mean(joint_logit)
        marg_term = tf.reduce_mean(marg_logit)
        disc_cost_mon = joint_term - marg_term
        if args.backprop:
            gen_cost = -disc_cost_mon
        else:
            gen_cost = -joint_term
        # compute gradient penalty
        alpha = tf.random_uniform(shape=(tf.shape(prediction)[0], 1))
        interpolates = alpha * (prediction_perm - prediction_processed)
        interpolates += prediction_processed
        gradients = tf.gradients(discriminator(interpolates),
                                 [interpolates])[0]
        slopes = tf.sqrt(
            tf.reduce_sum(tf.square(gradients), reduction_indices=[1]))
        gradient_penalty = tf.reduce_mean((slopes - 1.)**2)
        disc_cost = disc_cost_mon + args.gp_scaling * gradient_penalty
    elif args.gan_type == 'gan-gp':
        # same cost as default but with gradient penalty
        disc_cost_mon = (tf.reduce_mean(tf.nn.softplus(-marg_logit)) +
                         tf.reduce_mean(tf.nn.softplus(joint_logit)))
        if args.backprop:
            gen_cost = -disc_cost_mon
        else:
            gen_cost = -tf.reduce_mean(tf.nn.softplus(joint_logit))
        gradients_joint = tf.gradients(joint_logit, [prediction_processed])[0]
        gradients_marg = tf.gradients(marg_logit, [prediction_perm])[0]
        ss_joint = tf.reduce_sum(tf.square(gradients_joint),
                                 reduction_indices=[1])
        ss_marg = tf.reduce_sum(tf.square(gradients_marg),
                                reduction_indices=[1])
        gp_marg = tf.reduce_mean(ss_marg * (1 - tf.nn.sigmoid(marg_logit))**2)
        gp_joint = tf.reduce_mean(ss_joint * tf.nn.sigmoid(joint_logit)**2)
        disc_cost = disc_cost_mon + args.gp_scaling * (gp_joint + gp_marg)
    else:
        raise ValueError('Unknown GAN type')

    prediction_norm = prediction - tf.reduce_mean(
        prediction, 0, keep_dims=True)
    cov_mat = (tf.matmul(tf.transpose(prediction_norm), prediction_norm) /
               tf.cast(tf.shape(prediction)[0], prediction.dtype))

    # This computes the average absolute value of the correlation matrix.
    # It can be an interesting value to monitor to see if the model is at least
    # able to remove the linear dependencies.
    diag = tf.diag_part(cov_mat)
    cor_mat = cov_mat / tf.sqrt(diag[:, None] * diag[None, :])
    total_corr = (
        (tf.reduce_sum(tf.abs(cor_mat)) - tf.cast(source_dim, 'float32')) /
        (source_dim * (source_dim - 1)))

    if args.normalize_rec:
        reconstruction = mixer(prediction_processed * gamma + beta)
    else:
        reconstruction = mixer(prediction)

    rec_cost = tf.abs(reconstruction - y)
    rec_cost = tf.reduce_mean(rec_cost)

    tot_cost = args.rec_scaling * rec_cost + args.ind_scaling * gen_cost

    train_step_sep = optimizer(args.learning_rate).minimize(tot_cost,
                                                            var_list=sep_vars)
    train_step_disc = optimizer(args.learning_rate).minimize(
        disc_cost, var_list=disc_vars)
    max_corr = get_max_corr(x, prediction)

    summary_vars = OrderedDict({
        'total_corr': total_corr,
        'total_cost': tot_cost,
        'gen_cost': gen_cost,
        'disc_cost': disc_cost,
        'rec_cost': rec_cost
    })
    if not args.blind:
        summary_vars['max_corr'] = max_corr

    if args.gan_type == 'wgan-gp':
        summary_vars['disc_cost_mon'] = disc_cost_mon

    # intialize session
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    def fig2rgb_array(fig, expand=True):
        fig.canvas.draw()
        buf = fig.canvas.tostring_rgb()
        ncols, nrows = fig.canvas.get_width_height()
        shape = (nrows, ncols, 3) if not expand else (1, nrows, ncols, 3)
        return np.fromstring(buf, dtype=np.uint8).reshape(shape)

    prediction_np = sess.run(prediction, feed_dict={y: train_y.T})

    if args.plot_dim is None:
        num_signals = source_dim
    else:
        num_signals = args.plot_dim

    if args.blind:
        plot_fig = plot_signals(plt,
                                prediction_np.T[:, :plot_size],
                                prediction_np.T[:, :plot_size],
                                n=num_signals)
    else:
        plot_fig = plot_signals(plt,
                                train_x[:, :plot_size],
                                prediction_np.T[:, :plot_size],
                                n=num_signals)

    if args.folder is not None and not silent_mode:
        if not os.path.isdir(args.folder):
            os.makedirs(args.folder)
        print 'Saving logs to:', args.folder

    summary_lists = OrderedDict((key, []) for key in summary_vars)

    iteration_indices = []

    def fig2rgb_array(fig, expand=True):
        fig.canvas.draw()
        buf = fig.canvas.tostring_rgb()
        ncols, nrows = fig.canvas.get_width_height()
        shape = (nrows, ncols, 3) if not expand else (1, nrows, ncols, 3)
        return np.fromstring(buf, dtype=np.uint8).reshape(shape)

    if args.source_dim > 7 and not args.blind:
        warnings.warn('Sourcedim > 7. Using approximate corr evaluation.')

    for i in range(args.iterations):
        if i % 1000 == 0:
            feed_dict = {y: val_y.T}
            if not args.blind:
                feed_dict[x] = val_x.T
            summary = sess.run(summary_vars.values(), feed_dict=feed_dict)

            prediction_np = sess.run(prediction, feed_dict={y: train_y.T})

            if np.isnan(prediction_np[0, 0]):
                if silent_mode:
                    return np.nan
                else:
                    raise ValueError('NAN!')

            plt.gcf().clear()
            if args.blind:
                plot_fig = plot_signals(plt,
                                        prediction_np.T[:, :plot_size],
                                        prediction_np.T[:, :plot_size],
                                        n=num_signals)
            else:
                plot_fig = plot_signals(plt,
                                        train_x[:, :plot_size],
                                        prediction_np.T[:, :plot_size],
                                        n=num_signals)

            fig_rgb = fig2rgb_array(plot_fig.gcf(), expand=False)
            if args.visdom and not silent_mode:
                vis.image(fig_rgb.transpose(2, 0, 1),
                          win='predictions',
                          env=args.vd_env)

            if args.folder is not None:
                np.save(os.path.join(args.folder, 'output' + str(i) + '.npy'),
                        prediction_np)

            iteration_indices.append(i)
            for summ_val, summ_name in zip(summary, summary_vars.keys()):
                if summ_name == 'max_corr' and not args.blind:
                    if args.source_dim < 8:
                        max_corr_np = get_max_corr_perm(
                            prediction_np, train_x.T)
                    else:
                        max_corr_np = summ_val
                    summary_lists['max_corr'].append(max_corr_np)
                else:
                    if summ_name == 'total_cost':
                        total_cost_np = summ_val
                    summary_lists[summ_name].append(summ_val)
                if args.visdom and not silent_mode:
                    vis.line(Y=np.asarray(summary_lists[summ_name]),
                             X=np.asarray(iteration_indices),
                             win=summ_name,
                             env=args.vd_env,
                             opts=dict(title=summ_name))

            if not args.blind and not silent_mode:
                print i, 'Current max corr:', max_corr_np

        train_y_batch = get_random_batch(train_y.T, args.batch_size)
        train_step_sep.run(feed_dict={y: train_y_batch}, session=sess)

        for j in range(args.disc_updates):
            train_y_batch = get_random_batch(train_y.T, args.batch_size)
            train_step_disc.run(feed_dict={y: train_y_batch}, session=sess)

    # store final result somewhere in home folder together with the config
    if args.results_file is not None and not silent_mode:
        with open(args.results_file, 'w') as fout:
            fout.write(str(total_cost_np))
            if not args.blind:
                fout.write(' ' + str(max_corr_np) + '\n')
            else:
                fout.write('\n')
            for arg, value in args.__dict__.items():
                fout.write('{} : {}\n'.format(arg, value))

    return total_cost_np
Beispiel #16
0
def main(args):
    # hyperparameters/settings
    EXP = False
    n_hidden = args.hidden_dim
    batch_size = 256
    source_dim = 6 if args.data == 'synthetic' else 3
    input_dim = source_dim

    with tf.variable_scope('separator'):
        separator = PNLMISEP(input_dim, n_hidden, args.block_weight_scaling,
                             args.weight_stddev)

    sep_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
                                 scope='separator')

    y = tf.placeholder(tf.float32, shape=[None, input_dim])

    prediction = separator.forward(y)[-1]

    prediction_norm = prediction - tf.reduce_mean(
        prediction, 0, keep_dims=True)
    cov_mat = (tf.matmul(tf.transpose(prediction_norm), prediction_norm) /
               tf.cast(tf.shape(prediction)[0], prediction.dtype))

    tot_cost = -tf.reduce_mean(separator.get_log_det_jacobian2(y))
    optimizer = tf.train.RMSPropOptimizer
    train_step_sep = optimizer(args.learning_rate).minimize(tot_cost,
                                                            var_list=sep_vars)

    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)

    plot_size = 500
    if args.data == 'synthetic':
        all_x, all_y, A = synthetic.get_data(seed=101,
                                             task_type='pnl',
                                             mix_dim=input_dim)
        val_x = all_x[:, :args.n_validation]
        val_y = all_y[:, :args.n_validation]
        train_x = all_x[:, args.n_validation:]
        train_y = all_y[:, args.n_validation:]
    elif args.data == 'audio':
        linear_mix, pnl_mix, A, sources = audio.get_data()
        all_y = pnl_mix
        all_x = sources
        val_x = all_x[:, :args.n_validation]
        val_y = all_y[:, :args.n_validation]
        train_x = all_x[:, args.n_validation:]
        train_y = all_y[:, args.n_validation:]
        plot_size = None
    else:
        raise ValueError('No data set specified')

    prediction_np = sess.run(prediction, feed_dict={y: train_y.T})

    for i in range(500000):
        if i % 1000 == 0:
            prediction_np = sess.run(prediction, feed_dict={y: train_y.T})
            if np.isnan(prediction_np[0, 0]):
                raise ValueError('NAN!')
        train_y_batch = get_random_batch(train_y.T, batch_size)
        train_step_sep.run(feed_dict={y: train_y_batch}, session=sess)

    tot_cost_np = sess.run(tot_cost, feed_dict={y: val_y.T})
    max_corr_np = max_corr_np = get_max_corr_perm(prediction_np, train_x.T)

    if args.results_file is not None:
        with open(args.results_file, 'w') as fout:
            fout.write(str(tot_cost_np))
            fout.write(' ' + str(max_corr_np) + '\n')
            for arg, value in args.__dict__.items():
                fout.write('{} : {}\n'.format(arg, value))
Beispiel #17
0
def train(sess, args, adjTrainable, nlayers, use_sparse, bsize, nbatches,
          learningRate, nhidden, nsteps, maskLen, rank):

    seed = 123
    np.random.seed(seed)
    tf.set_random_seed(seed)

    # Load data
    adj, x_train, y_train, x_val, y_val, x_test, y_test = utils.load_data()
    train_var, val_var, test_var = np.var(y_train), np.var(y_val), np.var(
        y_test)

    # Build model
    model = gconv.GraphConvLSTM(adj,
                                x_train.shape[1],
                                x_train.shape[2],
                                num_layers=nlayers,
                                n_steps=nsteps,
                                n_hidden=nhidden,
                                adj_trainable=adjTrainable,
                                use_sparse=use_sparse,
                                mask_len=maskLen,
                                learning_rate=learningRate,
                                rank=rank)

    init = tf.global_variables_initializer()

    # Initialize tensorflow variables
    sess.run(init)

    best_val = 99999999.
    best_test = 99999999.
    best_batch = 0
    last_lr_update = 0

    # Display parameters
    display_step = 1000
    between_lr_updates = 500
    lr_factor = 0.9

    learningRate = sess.run(model.learning_rate_variable)

    cost_val = []

    train_mse = 0
    denom = 0.

    batches_complete = sess.run(model.global_step)

    saved_test_mse = 9999

    # Train model
    while batches_complete < nbatches:
        x_train_b, y_train_b = utils.get_random_batch(x_train, y_train, nsteps,
                                                      maskLen, bsize)

        t = time.time()
        # Construct feed dictionary
        feed_dict = utils.construct_feed_dict(x_train_b, y_train_b, model)
        feed_dict[model.learning_rate_variable] = learningRate

        # Training step
        _, batch_mse, batches_complete = sess.run(
            [model.opt_op, model.mse, model.global_step], feed_dict=feed_dict)
        train_mse += batch_mse

        batch_time = time.time() - t
        denom += 1

        # Periodically compute validation and test loss
        if batches_complete % display_step == 0 or batches_complete == nbatches:
            # Validation
            val_mse, duration = utils.evaluate(x_val, y_val, model, sess,
                                               nsteps, maskLen)
            cost_val.append(val_mse)

            test_mse, duration = utils.evaluate(x_test, y_test, model, sess,
                                                nsteps, maskLen)

            # Print results
            print("Batch Number:%04d" % (batches_complete),
                  "train_mse={:.5f}".format(train_mse / denom),
                  "val_mse={:.5f}".format(val_mse),
                  "test_mse={:.5f}".format(test_mse),
                  "test_rsq={:.5f}".format(1 - (test_mse / test_var)),
                  "time={:.5f}".format(batch_time),
                  "lr={:.8f}".format(learningRate))
            train_mse = 0
            denom = 0.

            # Check if val loss is the best encountered so far
            if val_mse < best_val:
                best_val = val_mse
                saved_test_mse = test_mse
                best_batch = batches_complete - 1

            if (batches_complete - best_batch > between_lr_updates) and (
                    batches_complete - last_lr_update > between_lr_updates):
                learningRate = learningRate * lr_factor
                last_lr_update = batches_complete

    print('best val mse: {0}, test mse: {1}, test rsq: {2}'.format(
        best_val, saved_test_mse, 1 - (saved_test_mse / test_var)))