Beispiel #1
0
    def compile(self, model):

        self.optimizer = tf.train.RMSPropOptimizer(0.001)
        self.opt = self.optimizer.minimize(model.total_loss,
                                           var_list=[self.D0])
        self.fit_op = K.Function(
            [model.input, model.targets[0], model.sample_weights[0]],
            [self.opt])
        self.loss = K.Function(
            [model.input, model.targets[0], model.sample_weights[0]],
            [model.total_loss])
        print(
            "Reduction Layer Compiled, batch %d" % self.patch_layer.patch_size,
            "\n", "Output shape:",
            self.compute_output_shape(self.input_shape_t))
def running(options):
    fold = 1
    dt = Dataset(options.dataset)
    # load data
    train_datas, train_labels, val_datas, val_labels, test_datas, test_labels \
        = dt.load_data(nevents=options.num_events, nsamples=options.num_samples, fold=fold)
    sequential_test_datas = dt.sequentialize_data(test_datas,
                                                  timestep=options.time_step)
    random.shuffle(sequential_test_datas)

    # build model and load weights
    att_s_beta_vae = AttSBetaVAE(options)
    K.reset_uids()
    sbvae = att_s_beta_vae.build_model(options)
    sbvae.load_weights(options.result_path + sbvae.name + '/fold_' +
                       str(fold) + '_last_weight.h5')

    # get the bottleneck features
    h_out = np.empty((1, 15))
    num_to_plot = 300
    for i in range(options.num_events):
        h_fnc = K.Function([sbvae.input], [sbvae.layers[15 + i].output])
        h = h_fnc([sequential_test_datas[:num_to_plot]])[0]
        h_out = np.concatenate([h_out, h])

    # visualization
    dt.visualization(datas=h_out[1:], name='Att_s_beta_VAE')
Beispiel #3
0
    def generate_data(self, model, event_index, input_data):
        model = model
        model.load_weights(self.op.result_path + self.op.name + '_' +
                           str(self.op.nevents) + '/cp_weight.h5')
        # Get the output of hidden layers.
        # e_fnc: gets the z* of target event;
        # decoder_fnc gets the output of decoder, which reconstruct the given inputs.

        e_fnc = K.Function(
            [model.input],
            [model.layers[8 + self.nevents + event_index + 1].output])
        decoder_fnc = K.Function([model.layers[6].output], [model.output[0]])

        event_num = e_fnc([input_data])[0]
        decoder_num = decoder_fnc([event_num])[0]

        gen_datas = decoder_num

        return gen_datas
    def create_funtions(self):
        if not self.is_compiled:
            self.compile_model()
        if self.is_var:
            indices = [i for (i, layer) in enumerate(self.model.layers)
                       if layer.name == 'variationaldense']
        else:
            indices = [i for (i, layer) in enumerate(self.model.layers)
                       if layer.name == 'dense']
        index = np.int(np.mean(indices))
        if self.model.layers[index + 1].name == 'batchnormalization':
            index = index + 1
            logging.info('Middle layer followed by batchnorm. This layer ({})'
                         ' is chosen as latent space'.format(index))

        logging.info('Creating encoder and decoder function')
        self.encoder = K.Function([self.model.layers[0].get_input(train=False)],
                                  [self.model.layers[index].get_output(train=False)]) # index is the middle layer? i.e. the latent space?

        self.decoder = K.Function([self.model.layers[index + 1].get_input(train=False)],
                                  [self.model.layers[-1].get_output(train=False)],
                                  **{'on_unused_input': 'warn'})
Beispiel #5
0
 def setup_model(self):
     
     current_state = K.placeholder(shape=(None,)+self.env.observation_space.shape)
     next_state = K.placeholder(shape=(None,)+self.env.observation_space.shape)
     action = K.placeholder(ndim=1)
     terminated = K.placeholder(ndim=1)
     reward = K.placeholder(ndim=1)
     current_Q = self.model.net(current_state)
     next_Q = self.model.net(next_state)
     
     target_Q
     
     optimizer = tf.train.RMSProp()
     
     loss = K.mean(K.square(target_q-current_Q))
     op = K.Function([current_state,next_state,action,reward,terminated],[optimizer.minimize(loss)])
Beispiel #6
0
    def _make_train_function(self):
        # pylint: disable=attribute-defined-outside-init
        """
        We override this method so that we can use tensorflow optimisers directly.
        This is desirable as tensorflow handles gradients of sparse tensors efficiently.
        """
        if not hasattr(self, 'train_function'):
            raise RuntimeError('You must compile your model before using it.')
        if self.train_function is None:
            inputs = self._feed_inputs + self._feed_targets + self._feed_sample_weights
            if self.uses_learning_phase and not isinstance(
                    K.learning_phase(), int):
                inputs += [K.learning_phase()]

            # Here we override Keras to use tensorflow optimizers directly.
            self.global_step = K.variable(0., name='global_step')
            gradients = tensorflow.gradients(self.total_loss,
                                             self._collected_trainable_weights)
            if self.gradient_clipping is not None:
                # Don't pop from the gradient clipping dict here as
                # if we call fit more than once we need it to still be there.
                clip_type = self.gradient_clipping.get("type")
                clip_value = self.gradient_clipping.get("value")
                if clip_type == 'clip_by_norm':
                    gradients, _ = tensorflow.clip_by_global_norm(
                        gradients, clip_value)
                elif clip_type == 'clip_by_value':
                    gradients = [
                        tensorflow.clip_by_value(x, -clip_value, clip_value)
                        for x in gradients
                    ]
                else:
                    raise ConfigurationError(
                        "{} is not a supported type of gradient clipping.".
                        format(clip_type))

            zipped_grads_with_weights = zip(gradients,
                                            self._collected_trainable_weights)
            # pylint: disable=no-member
            training_updates = self.optimizer.apply_gradients(
                zipped_grads_with_weights, global_step=self.global_step)
            # pylint: enable=no-member
            updates = self.updates + [training_updates]
            # Gets loss and metrics. Updates weights at each call.
            self.train_function = K.Function(inputs, [self.total_loss] +
                                             self.metrics_tensors,
                                             updates=updates)
Beispiel #7
0
	def attention_visuliaze():
		weight_lay = ['alpha_prob', 'beta_prob', 'final_att']
		inp = mm.input
		weights = {}
		for layer in mm.layers:
			if layer.name in weight_lay:
				func = K.Function(inp, [layer.output])
				weight = func([X_test[:, :maxSeqLen], X_test[:, maxSeqLen:]])[0]
				weights[layer.name] = weight
		for line in range(10000):
			sent, con = [], []
			for i in range(len(X_test[line])):
				if i>=maxSeqLen: con.append(id2con[int(X_test[line][i])])
				else: sent.append(id2w[int(X_test[line][i])])
			sent = [item for item in sent if item != '<PAD>']
			print(id2label[y_test[line]], id2label[y_pred[line]], ''.join(sent))
			print(con)
			print(weights['alpha_prob'][line], '||', (255-weights['alpha_prob'][line]*255))
			print(weights['beta_prob'][line], '||', (255-weights['beta_prob'][line]*255))
			print(weights['final_att'][line], '||', (255-weights['final_att'][line]*255))
			print()
Beispiel #8
0
    def setup_model(self):
        inputs = layers.Input(shape=self.input_dim)

        y = nn.ReductionLayer(8,64,0.001)(inputs)
        y = layers.Flatten()(y)

        self.y = layers.Dense(256,activation="tanh")(y)
        g = K.Function([inputs],[self.y])
        X = g([agent.memory.sample(1000)["state"]])[0]

        self.init = nn.InitCentersRandom(X)
        y = nn.RBFLayer(512,initializer=self.init)(self.y)
        x = layers.Dense(128,activation="tanh")(y)
        x = layers.Dense(128,activation="tanh")(x)
        x = layers.Dense(128,activation="relu")(x)
        x = layers.Dense(64,activation="relu")(x)

        outputs = layers.Dense(self.output_n,activation='linear')(x)
        self.net = keras.models.Model(inputs, outputs)        
        optim = keras.optimizers.RMSprop(lr=0.00025, rho=0.95, epsilon=0.01)
        self.net.compile(optimizer=optim,loss='mse')
            #self.reducer.compile(self.model)
        print(self.net.summary())        
Beispiel #9
0
idx = np.argsort(X_tr)
X_tr = X_tr[idx]
Z_tr = Z_tr[idx]

# Learned
coefs = K.random_normal_variable((100, ), 0, 0.1)
pows = K.arange(100, dtype='float32')
X = K.placeholder((None, 1))
Y = K.mean(K.exp(-K.square(X) * coefs), axis=1)
Z = K.placeholder(ndim=1)

loss = K.mean(K.square(Y - Z))

optimizer = tf.train.AdamOptimizer()
opt = optimizer.minimize(loss, var_list=[coefs])
grad = K.Function([X, Z], [opt])

get_loss = K.Function([X, Z], [loss])
get_Y = K.Function([X], [Y])

for i in range(1000):
    grad([X_tr.reshape(-1, 1), Z_tr])
    if not i % 100:
        print(get_loss([X_tr.reshape(-1, 1), Z_tr]))

        plt.plot(X_tr, Z_tr)
        plt.plot(X_tr, get_Y([X_tr.reshape(-1, 1)])[0])
        plt.show()

# Change Z_tt
Z_tt = Z_tr.copy()
results_index = 0
for r in range(repeats):
    cross_val_split = KFold(n_splits=cv_splits, shuffle=True)
    for train, val in cross_val_split.split(X):
        X_train = X[train]
        X_val = X[val]
        y_train = y[train]
        y_val = y[val]
        #------- Train the wide and deep neural network ------#
        wdnn = get_wide_deep_raw_features()
        wdnn.fit(X_train,
                 alpha_matrix[train],
                 epochs=100,
                 verbose=False,
                 validation_data=[X_val, alpha_matrix[val]])
        mc_dropout = K.Function(wdnn.inputs + [K.learning_phase()],
                                wdnn.outputs)
        #wdnn_probs = ensemble(X_val, y_val, mc_dropout)
        wdnn_probs = wdnn.predict(X_val)
        for i, drug in enumerate(drugs):
            non_missing_val = np.where(y_val[:, i] != -1)[0]
            auc_y = np.reshape(y_val[non_missing_val, i],
                               (len(non_missing_val), 1))
            auc_preds = np.reshape(wdnn_probs[non_missing_val, i],
                                   (len(non_missing_val), 1))
            val_auc = roc_auc_score(auc_y, auc_preds)
            val_auc_pr = average_precision_score(
                1 - y_val[non_missing_val, i],
                1 - wdnn_probs[non_missing_val, i])
            results.loc[results_index] = [
                'WDNN Raw Features', drug, val_auc, val_auc_pr
            ]
Beispiel #11
0

# custom loss function - we only care about (cos,sin) outputs at the
# non-zero positions in the sparse y_true vector.  To avoid driving the
# other samples to 0 we use a sparse loss function.  The normalisation
# term accounts for the time varying number of no-zero samples.
def sparse_loss(y_true, y_pred):
    mask = K.cast(K.not_equal(y_true, 0), dtype='float32')
    n = K.sum(mask)
    return K.sum(K.square((y_pred - y_true) * mask)) / n


# testing custom loss function
y_true = Input(shape=(None, ))
y_pred = Input(shape=(None, ))
loss_func = K.Function([y_true, y_pred], [sparse_loss(y_true, y_pred)])
assert loss_func([[[0, 1, 0]], [[2, 2, 2]]]) == np.array([1])
assert loss_func([[[1, 1, 0]], [[3, 2, 2]]]) == np.array([2.5])
assert loss_func([[[0, 1, 0]], [[0, 2, 0]]]) == np.array([1])

# fit the model
from keras import optimizers
sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss=sparse_loss, optimizer=sgd)

# training propper with real phase data
history = model.fit(amp_train,
                    phase_train_rect,
                    batch_size=nb_batch,
                    epochs=args.epochs,
                    validation_split=0.1)
Beispiel #12
0
def running(options):
    dt = Dataset(options.dataset)
    # 1.First construct polyphonic datasets by mixing single event sound, and extract MFCCs features.
    if options.mix_data:
        dt.mix_data(nevents=options.num_events,
                    nsamples=options.num_samples,
                    isUnbalanced=True)

    f1_list, er_list, fold_list = [], [], []

    att_s_beta_vae = AttSBetaVAE(options)
    # 2.Load data.
    train_datas, train_labels, test_datas, test_labels \
        = dt.load_data(nevents=options.num_events, nsamples=options.num_samples, fold=1, isUnbalanced=True)
    sequential_train_datas = dt.sequentialize_data(train_datas,
                                                   timestep=options.time_step)
    sequential_test_datas = dt.sequentialize_data(test_datas,
                                                  timestep=options.time_step)

    # 3.Create attention-based supervised beta-VAE model and train it with unbalanced datas.
    K.reset_uids()
    model = att_s_beta_vae.build_model(options)
    att_s_beta_vae.train_model(model,
                               x_train=sequential_train_datas,
                               y_train=train_labels,
                               fold=1,
                               new_weights=options.result_path + model.name +
                               '/fold_1_last_weight_DA.h5')

    # 4.Evaluate the performance on F1 and ER
    # Param: supervised is set to 'False' default. 'True' for supervised beta-VAE and 'False' for others.
    # This function evaluate the segment-based F1 score and ER.
    f1_score, error_rate = att_s_beta_vae.metric_model(
        model,
        sequential_test_datas,
        test_labels,
        supervised=True,
        new_weight_path='fold_1_last_weight_DA.h5')

    print(
        'Before data augmentation '
        '>>> nevents {nevents}, nsamples {nsamples} ==> error_rate: {error_rate}, f1_score: {f1_score}'
        .format(nevents=options.num_events,
                nsamples=options.num_samples,
                error_rate=error_rate,
                f1_score=f1_score))

    # 5.define the function to get z^*, here the inefficient category is the first events.
    z_star_fnc = K.Function([model.input], [model.layers[14].output])
    # here x are the input raw features extracted the first category
    x = []
    index = []
    y_addition = []
    for idx in range(len(train_labels)):
        y = train_labels[idx]
        if (y == [1, 0, 0, 0, 0]).all():
            index.append(idx)
            x.append(sequential_train_datas[idx])
            y_addition.append(y)
    z_star = z_star_fnc([x])[0]
    # 6.define the decoder
    decoder_fnc = K.Function([model.layers[6].output], [model.output[0]])
    generated_data = decoder_fnc([z_star])[0]

    # 7.augment the training set with generated data
    sequential_train_datas = np.concatenate(
        [sequential_train_datas, generated_data])
    train_labels = np.concatenate([train_labels, y_addition])

    # 8.retrain the model
    K.reset_uids()
    model = att_s_beta_vae.build_model(options)
    att_s_beta_vae.train_model(model,
                               x_train=sequential_train_datas,
                               y_train=train_labels,
                               fold=1,
                               new_weights=options.result_path + model.name +
                               '/fold_1_last_weight_DA_after.h5')
    f1_score_DA, error_rate_DA = att_s_beta_vae.metric_model(
        model,
        sequential_test_datas,
        test_labels,
        supervised=True,
        new_weight_path='fold_1_last_weight_DA_after.h5')

    print(
        'nevents {nevents}, nsamples {nsamples} ==> error_rate: {error_rate}, f1_score: {f1_score}'
        .format(nevents=options.num_events,
                nsamples=options.num_samples,
                error_rate=error_rate_DA,
                f1_score=f1_score_DA))
    fold_list.append('AVER')
    result_df = pd.DataFrame({'F1': f1_list, 'ER': er_list}, index=fold_list)
    result_df.to_csv(options.result_path + options.name + '_' +
                     str(options.num_events) + '/K_Folds_results.csv')
    return result_df
Beispiel #13
0
 # Get feature and label data for current drug
 X_mlp = df_X[num_snp_indiv_val[i]].as_matrix()
 # Label data for current drug
 y_true_drug = y_true[:, j]
 # Disregard rows for which no resistance data exists
 y_true_small = y_true_drug[y_true_drug != -1]
 X_small = X_mlp[y_true_drug != -1]
 # Get test data for current drug and proper SNPs
 y_test_drug = y_test[:, j]
 y_test_small = y_test_drug[y_test_drug != -1]
 X_test_small = full_validation_df[num_snp_indiv_val[i]].as_matrix()
 X_test_small = X_test_small[y_test_drug != -1]
 # Train on MLP
 clf1 = get_mlp_single()
 clf1.fit(X_small, y_true_small, nb_epoch=50)
 clf_do = K.Function(clf1.inputs + [K.learning_phase()], clf1.outputs)
 y_pred_strat_test = ensemble(X_test_small,
                              np.expand_dims(y_test_small, axis=1), clf_do)
 y_pred_strat_train = ensemble(X_small, np.expand_dims(y_true_small,
                                                       axis=1), clf_do)
 # Compute AUC scores for validation set
 auc_strat_data_test[i] = roc_auc_score(y_test_small, y_pred_strat_test)
 # Get sensitivity and specificity for validation set
 strat_data_indiv = get_threshold(y_true_small, y_pred_strat_train,
                                  y_test_small, y_pred_strat_test)
 strat_data_indiv = get_sens_spec_from_threshold(y_test_small,
                                                 y_pred_strat_test,
                                                 strat_thresh_from_cv[j])
 spec_strat_data_test[i] = strat_data_indiv['spec']
 sens_strat_data_test[i] = strat_data_indiv['sens']
 plot_fpr_tpr = plot_roc_auc(drug, y_test_small, y_pred_strat_test)
Beispiel #14
0
# code in "Keras backend" language
def n0_dft(n0_scaled):
    n0_scaled = K.print_tensor(n0_scaled, "n0_scaled is: ")
    n0 = n0_scaled * gain  #*P_max
    n0 = K.print_tensor(n0, "n0 is: ")
    #note n0_scaled = n0/P_max such that n0_scaled stays betwen [0..1]
    N = width
    cos_term = K.cos(n0 * K.cast(K.arange(N), dtype='float32') * np.pi / N)
    sin_term = K.sin(-n0 * K.cast(K.arange(N), dtype='float32') * np.pi / N)
    return K.concatenate([cos_term, sin_term], axis=-1)


# testing custom layer against numpy implementation

a = layers.Input(shape=(None, ))
custom_layer = K.Function([a], [n0_dft(a)])
for i in range(10):
    e_test = np.array(custom_layer([[[n0[i] / gain]]]))
    # so e_test is continuous, we just want to sample at nonzero harmonic points
    ind = np.nonzero(e_rect[i, :])
    err = (e_rect[i, ind] - e_test[0, 0, ind])
    # there will be a small error as the GPU and Host don't always agree
    print(i, L[i], n0[i], err.shape, np.std(err))
    assert (np.mean(np.std(err)) < 1E-4)
print("n0_dft custom layer tested")


# custom loss function
def sparse_loss(y_true, y_pred):
    mask = K.cast(K.not_equal(y_pred, 0), dtype='float32')
    #mask = K.print_tensor(mask, "mask is: ")
Beispiel #15
0
model.summary()

X = np.random.normal(0, 1, (1000, 64, 64, 1))
Y = np.mean(X, axis=(1, 2))
W = np.random.normal(0, 1, (1000, 64, 64, 1))
Z = np.mean(W, axis=(1, 2))

reducer.compile(model)

model.fit(X, Y)
reducer.fit(X, Y)
print(reducer.display_update())
model.evaluate(W, Z)

targets = K.Function(model.targets, model.targets)
output = K.Function([model.input], [model.output])
W = output([X])
Z = targets([Y])
loss = K.Function([model.input, model.targets[0], model.sample_weights[0]],
                  [model.total_loss])
sess.run(model.total_loss,
         feed_dict={
             model.input: X,
             model.targets[0]: [y for y in Y]
         })
model.fit(X, Y, batch_size=len(X))
f
K.eval()
loss([X, Y, np.ones(len(X))])
def f_score_diff(y_true, y_pred, eps=1e-6):
    true_positive_count = K.sum(y_true * y_pred, axis=[1, 2, 3])
    false_negative_count = K.sum((1 - y_true) * y_pred, axis=[1, 2, 3])
    false_positive_count = K.sum(y_true * (1 - y_pred), axis=[1, 2, 3])
    return -K.mean(((5 * true_positive_count + eps) * 100) /
                   (5 * true_positive_count + 4 * false_negative_count +
                    false_positive_count + eps),
                   axis=0)


from keras import layers
from keras import losses

x = layers.Input(shape=(None, None, None, 1))
y = layers.Input(shape=(None, None, None, 1))
loss_func = K.Function([x, y], [iou(x, y, 1e-6)])

a1 = np.array([[0, 0, 0], [0, 0, 0]])
a1 = np.expand_dims(a1, -1)
a2 = np.array([[0, 0, 0], [0, 0, 0]])
a2 = np.expand_dims(a2, -1)
a3 = np.array([[1, 1, 0], [0, 0, 0]])
a3 = np.expand_dims(a3, -1)
a_true = np.stack((a1, a2, a3))
b1 = np.array([[0, 0, 0], [0, 0, 0]])
b1 = np.expand_dims(b1, -1)
b2 = np.array([[0, 0, 0], [0, 0, 0]])
b2 = np.expand_dims(b2, -1)
b3 = np.array([[0, 0, 0], [0, 0, 0]])
b3 = np.expand_dims(b3, -1)
b_pred = np.stack((b1, b2, b3))
Beispiel #17
0
        loaded_model = model_from_json(fhl.read())

    loaded_model.load_weights(arg.weights)

    # Get test data
    df_X_test = pd.read_csv(arg.genotype_data)
    df_y_test = pd.read_csv(arg.phenotype_data)

    X_test = df_X_test.as_matrix()
    y_test = df_y_test.as_matrix()

    # Ensembling
    def ensemble(X, y, function):
        preds = np.zeros_like(y, dtype=np.float)
        for i in range(100):
            preds += np.squeeze(np.array(function([X, 1])), axis=0)
        return preds / 100

    clf_dom = K.Function(loaded_model.inputs + [K.learning_phase()],
                         loaded_model.outputs)

    ## NOTE: A PREDICTION OF 0 CORRESPONDS TO A "RESISTANT" PHENOTYPE AND
    ## A PREDICTION OF 1 CORREPONDS TO A "SUSCEPTIBLE" PHENOTYPE.
    y_pred = ensemble(X_test, y_test, clf_dom)

    np.savetxt(
        sys.stdout,
        y_pred,
        delimiter=",",
        header="rif, inh, pza, emb, str, cip, cap, amk, moxi, oflx, kan")
Beispiel #18
0
from keras import Model
from keras import initializers
import matplotlib.pyplot as plt
from scipy import signal
from keras import backend as K

# custom loss function
def sparse_loss(y_true, y_pred):
    mask = K.cast( K.not_equal(y_pred, 0), dtype='float32')
    n = K.sum(mask)
    return K.sum(K.square((y_pred - y_true)*mask))/n

# testing custom loss function
x = Input(shape=(None,))
y = Input(shape=(None,))
loss_func = K.Function([x, y], [sparse_loss(x, y)])
assert loss_func([[[1,1,1]], [[0,2,0]]]) == np.array([1])
assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1])

# constants

N                 = 80      # number of time domain samples in frame
nb_samples        = 400000
nb_batch          = 32
nb_epochs         = 10
width             = 256
pairs             = 2*width
fo_min            = 50
fo_max            = 400
Fs                = 8000
Beispiel #19
0
    def make(self, theano_kwargs=None):
        '''Make the model and compile it. 

        Igor's config options control everything.  

        Arg:
            theano_kwargs as dict for debugging theano or submitting something custom
        '''

        if self.igor.embedding_type == "convolutional":
            make_convolutional_embedding(self.igor)
        elif self.igor.embedding_type == "token":
            make_token_embedding(self.igor)
        elif self.igor.embedding_type == "shallowconv":
            make_shallow_convolutional_embedding(self.igor)
        elif self.igor.embedding_type == "minimaltoken":
            make_minimal_token_embedding(self.igor)
        else:
            raise Exception("Incorrect embedding type")

        B = self.igor.batch_size
        spine_input_shape = (B, self.igor.max_num_supertags)
        child_input_shape = (B, 1)
        parent_input_shape = (B, 1)

        E, V = self.igor.word_embedding_size, self.igor.word_vocab_size  # for word embeddings

        repeat_N = self.igor.max_num_supertags  # for lex
        mlp_size = self.igor.mlp_size

        ## dropout parameters
        p_emb = self.igor.p_emb_dropout
        p_W = self.igor.p_W_dropout
        p_U = self.igor.p_U_dropout
        w_decay = self.igor.weight_decay
        p_mlp = self.igor.p_mlp_dropout

        def predict_params():
            return {
                'output_dim': 1,
                'W_regularizer': l2(w_decay),
                'activation': 'relu',
                'b_regularizer': l2(w_decay)
            }

        dspineset_in = Input(batch_shape=spine_input_shape,
                             name='daughter_spineset_in',
                             dtype='int32')
        pspineset_in = Input(batch_shape=spine_input_shape,
                             name='parent_spineset_in',
                             dtype='int32')
        dhead_in = Input(batch_shape=child_input_shape,
                         name='daughter_head_input',
                         dtype='int32')
        phead_in = Input(batch_shape=parent_input_shape,
                         name='parent_head_input',
                         dtype='int32')
        dspine_in = Input(batch_shape=child_input_shape,
                          name='daughter_spine_input',
                          dtype='int32')
        inputs = [dspineset_in, pspineset_in, dhead_in, phead_in, dspine_in]

        ### Layer functions
        ############# Convert the word indices to vectors
        F_embedword = Embedding(input_dim=V,
                                output_dim=E,
                                mask_zero=True,
                                W_regularizer=l2(w_decay),
                                dropout=p_emb)

        if self.igor.saved_embeddings is not None:
            self.logger.info("+ Cached embeddings loaded")
            F_embedword.initial_weights = [self.igor.saved_embeddings]

        ###### Prediction Functions
        ## these functions learn a vector which turns a tensor into a matrix of probabilities

        ### P(Parent supertag | Child, Context)
        F_parent_predict = ProbabilityTensor(
            name='parent_predictions',
            dense_function=Dense(**predict_params()))
        ### P(Leaf supertag)
        F_leaf_predict = ProbabilityTensor(
            name='leaf_predictions', dense_function=Dense(**predict_params()))

        ###### Network functions.
        ##### Input word, correct its dimensions (basically squash in a certain way)
        F_singleword = compose(Fix(), F_embedword)
        ##### Input spine, correct diemnsions, broadcast across 1st dimension
        F_singlespine = compose(RepeatVector(repeat_N), Fix(),
                                self.igor.F_embedspine)
        ##### Concatenate and map to a single space
        F_alignlex = compose(
            RepeatVector(repeat_N), Dropout(p_mlp),
            Dense(mlp_size, activation='relu', name='dense_align_lex'), concat)

        F_alignall = compose(
            Distribute(Dropout(p_mlp), name='distribute_align_all_dropout'),
            Distribute(Dense(mlp_size,
                             activation='relu',
                             name='align_all_dense'),
                       name='distribute_align_all_dense'), concat)
        F_alignleaf = compose(
            Distribute(
                Dropout(p_mlp * 0.66), name='distribute_leaf_dropout'
            ),  ### need a separate oen because the 'concat' is different for the two situations
            Distribute(Dense(mlp_size, activation='relu', name='leaf_dense'),
                       name='distribute_leaf_dense'),
            concat)

        ### embed and form all of the inputs into their components
        ### note: spines == supertags. early word choice, haven't refactored.
        leaf_spines = self.igor.F_embedspine(dspineset_in)
        pspine_context = self.igor.F_embedspine(pspineset_in)
        dspine_single = F_singlespine(dspine_in)

        dhead = F_singleword(dhead_in)
        phead = F_singleword(phead_in)

        ### combine the lexical material
        lexical_context = F_alignlex([dhead, phead])

        #### P(Parent Supertag | Daughter Supertag, Lexical Context)
        ### we know the daughter spine, want to know the parent spine
        ### size is (batch, num_supertags)
        parent_problem = F_alignall(
            [lexical_context, dspine_single, pspine_context])

        ### we don't have the parent, we just have a leaf
        leaf_problem = F_alignleaf([lexical_context, leaf_spines])

        parent_predictions = F_parent_predict(parent_problem)
        leaf_predictions = F_leaf_predict(leaf_problem)
        predictions = [parent_predictions, leaf_predictions]

        theano_kwargs = theano_kwargs or {}
        ## make it quick so i can load in the weights.
        self.model = Model(input=inputs,
                           output=predictions,
                           preloaded_data=self.igor.preloaded_data,
                           **theano_kwargs)

        #mask_cache = traverse_nodes(parent_prediction)
        #desired_masks = ['merge_3.in.mask.0']
        #self.p_tensor = K.function(inputs+[K.learning_phase()], [parent_predictions, F_parent_predict.inbound_nodes[0].input_masks[0]])

        if self.igor.from_checkpoint:
            self.load_checkpoint_weights()
        elif not self.igor.in_training:
            raise Exception("No point in running this without trained weights")

        if not self.igor.in_training:
            expanded_children = RepeatVector(repeat_N, axis=2)(leaf_spines)
            expanded_parent = RepeatVector(repeat_N, axis=1)(pspine_context)
            expanded_lex = RepeatVector(repeat_N, axis=1)(
                lexical_context
            )  # axis here is arbitary; its repeating on 1 and 2, but already repeated once
            huge_tensor = concat(
                [expanded_lex, expanded_children, expanded_parent])
            densely_aligned = LastDimDistribute(
                F_alignall.get(1).layer)(huge_tensor)
            output_predictions = Distribute(
                F_parent_predict, force_reshape=True)(densely_aligned)

            primary_inputs = [phead_in, dhead_in, pspineset_in, dspineset_in]
            leaf_inputs = [phead_in, dhead_in, dspineset_in]

            self.logger.info("+ Compiling prediction functions")
            self.inner_func = K.Function(primary_inputs + [K.learning_phase()],
                                         output_predictions)
            self.leaf_func = K.Function(leaf_inputs + [K.learning_phase()],
                                        leaf_predictions)
            try:
                self.get_ptensor = K.function(
                    primary_inputs + [K.learning_phase()], [
                        output_predictions,
                    ])
            except:
                import pdb
                pdb.set_trace()
        else:

            optimizer = Adam(self.igor.LR,
                             clipnorm=self.igor.max_grad_norm,
                             clipvalue=self.igor.grad_clip_threshold)

            theano_kwargs = theano_kwargs or {}
            self.model.compile(loss="categorical_crossentropy",
                               optimizer=optimizer,
                               metrics=['accuracy'],
                               **theano_kwargs)
Beispiel #20
0
    def build_generate(self, model):

        #################
        ## Model Setup ##
        #################

        sess = K.get_session()

        model = keras.models.load_model(model,
                                        compile=False,
                                        custom_objects={'NNResize': NNResize})

        encoder_input = model.input
        encoder_output = model.get_layer('z_mean').output
        encoder = K.Function([encoder_input], [encoder_output])

        decoder_input = model.get_layer('dense_1').input
        decoder_output = model.output
        decoder = K.Function([decoder_input], [decoder_output])

        output_shape = decoder_output.get_shape().as_list()

        ##########################
        ## Function Definitions ##
        ##########################

        tf_spectrogram = Spectrogram(n_fft=self.N_FFT,
                                     hop_length=self.HOP_LEN,
                                     freq_format='freq_last')

        mag_placeholder = tf.placeholder(shape=(1, *output_shape[1:]),
                                         dtype=np.float32)

        alpha = 100
        init_recon = np.random.randn(1,
                                     int(self.expected_len)).astype(np.float32)
        signal_len = init_recon.shape[1]
        recon = tf.Variable(init_recon)

        recon_mel_out = tf_spectrogram.call(recon)
        stft_tf = tf.contrib.signal.stft(recon,
                                         frame_length=self.N_FFT,
                                         frame_step=self.HOP_LEN,
                                         pad_end=True)
        x_tf = tf.contrib.signal.inverse_stft(
            stft_tf,
            frame_length=self.N_FFT,
            frame_step=self.HOP_LEN,
            fft_length=self.N_FFT)[:, :signal_len]

        x_loss = tf.reduce_sum(tf.square(recon - x_tf))
        mag_loss = tf.reduce_sum(tf.square(mag_placeholder - recon_mel_out))

        recon_loss = alpha * x_loss + mag_loss
        sess.run(recon.initializer)

        recon_opt = tf.contrib.opt.ScipyOptimizerInterface(
            recon_loss,
            method='L-BFGS-B',
            options={'maxiter': 500},
            var_list=[recon])

        def generate():
            for idx, coord_slider in enumerate(self.coord_sliders):
                self.current_coords[idx] = coord_slider.get()

            decoder_input = np.asarray([self.current_coords], dtype=np.float32)
            decoder_output = decoder([decoder_input])[0]
            amp_out = self.to_amp(decoder_output)
            amp_out[:, :(self.front_padding // self.HOP_LEN)] = 0

            feed_dict = {mag_placeholder: amp_out}
            recon_opt.minimize(sess, feed_dict=feed_dict)
            print('Recon loss:',
                  recon_loss.eval(session=sess, feed_dict=feed_dict))
            recon_out = recon.eval(session=sess)

            self.audio = recon_out[0]

            self.ax.clear()
            self.ax.plot(self.audio)
            self.canvas.draw()

        self.generate = generate
Beispiel #21
0
 def train_on_batch(self,X,Y):
     if not self.initialized:
         g = K.Function([self.inputs],[self.y])
         Z = g([X])[0]
         self.init.X = Z
Beispiel #22
0
    def setup_agent(self):

        advantages = K.placeholder(
            dtype=tf.float32,
            shape=[None])  # Target advantage function (if applicable)
        returns = K.placeholder(dtype=tf.float32,
                                shape=[None])  # Empirical return
        observations = self.pi.observation_input
        actions = self.pi.pdtype.sample_placeholder([None])

        self.pi = self.policy_func(self.env, observations)
        self.oldpi = self.policy_func(self.env, observations)

        self.value_function = self.value_func(self.env,
                                              observations,
                                              lr=self.vf_stepsize)

        kl_old_new = self.oldpi.kl(self.pi)
        entropy = self.pi.entropy()
        mean_kl = K.mean(kl_old_new)
        mean_entropy = K.mean(entropy)
        entropy_bonus = self.entropy_coeff * mean_entropy

        vferr = K.mean(K.square(self.pi.value_pred - returns))

        ratio = tf.exp(self.pi.pd.logp(actions) -
                       self.oldpi.pd.logp(actions))  # advantage * pnew / pold
        surrogate_gain = K.mean(ratio * advantages)

        optimization_gain = surrogate_gain + entropy_bonus
        losses = [
            optimization_gain, mean_kl, entropy_bonus, surrogate_gain,
            mean_entropy
        ]
        self.loss_names = [
            "Optim Gain", "mean KL", "entropy loss", "surrogate gain",
            "entropy"
        ]

        dist = mean_kl

        kl_gradient = self.pi.flatten.gradient(dist)

        tangents = K.placeholder(shape=(None, ) +
                                 K.eval(self.pi.variables.shape))
        gradient_vector_product = K.sum(kl_gradient * tangents)  #pylint: disable=E1111
        fisher_vector_product = self.pi.flatten.flatgrad(
            gradient_vector_product)

        self.compute_losses = K.Function([observations, actions, advantages],
                                         losses)
        self.compute_loss_grad = K.Function(
            [observations, actions, advantages],
            losses + [self.pi.flatten.flatgrad(optimization_gain)])
        self.compute_fisher_vector_product = K.Function(
            [tangents, observations, actions, advantages],
            fisher_vector_product)
        self.compute_value_function_lossandgrad = K.function(
            [observations, returns],
            [self.value_function.flatten.flatgrad(vferr)])

        th_init = self.pi.flatten.get_value()

        print("Init param sum", th_init.sum(), flush=True)
Beispiel #23
0
 def build(self):
     RNN = kl.GRU if self.rnn == "gru" else kl.LSTM
     if self.symbols_has_features:
         symbol_inputs = kl.Input(shape=(None, self.input_symbols_number),
                                  dtype='int32')
     else:
         symbol_inputs = kl.Input(shape=(None, ), dtype='int32')
     symbol_embeddings, symbol_inputs_length = self._build_symbol_layer(
         symbol_inputs)
     if not self.use_attention:
         if self.history > 1:
             symbol_inputs_length *= self.history
             pad = kb.zeros_like(symbol_embeddings[0, 0])
             symbol_embeddings = kl.Lambda(
                 make_history,
                 arguments={
                     "h": self.history,
                     "pad": pad,
                     "flatten": True
                 },
                 output_shape=(None,
                               symbol_inputs_length))(symbol_embeddings)
         to_concatenate = [symbol_embeddings]
     else:
         encodings = RNN(self.memory_embeddings_size,
                         return_sequences=True)(symbol_embeddings)
         if self.dropout > 0.0:
             encodings = kl.Dropout(self.dropout)(encodings)
         if self.symbols_has_features or self.use_embeddings:
             AttentionLayer, attention_inputs = AttentionCell3D, symbol_embeddings
         else:
             AttentionLayer, attention_inputs = AttentionCell, symbol_inputs
         attention_layer = AttentionLayer(self.history,
                                          symbol_inputs_length,
                                          self.memory_embeddings_size,
                                          use_bias=self.use_attention_bias)
         memory, attention_probs = attention_layer(
             [attention_inputs, encodings])
         to_concatenate = [memory]
     if self.labels_ is not None:
         feature_inputs = kl.Input(shape=(self.feature_vector_size, ))
         inputs = [symbol_inputs, feature_inputs]
         feature_inputs_length = self.feature_vector_size
         if self.use_feature_embeddings:
             feature_inputs = kl.Dense(
                 self.feature_embeddings_size,
                 input_shape=(self.feature_vector_size, ),
                 activation="relu",
                 use_bias=False)(feature_inputs)
             feature_inputs_length = self.feature_embeddings_size
         # cannot use kb.repeat_elements because it requires an integer
         feature_inputs = kl.Lambda(
             repeat_,
             arguments={"k": kb.shape(symbol_embeddings)[1]},
             output_shape=(None, feature_inputs_length))(feature_inputs)
         to_concatenate.append(feature_inputs)
     else:
         inputs = [symbol_inputs]
     lstm_inputs = (kl.Concatenate()(to_concatenate)
                    if len(to_concatenate) > 1 else to_concatenate[0])
     if not self.use_attention or self.use_output_rnn:
         lstm_outputs = RNN(self.rnn_size,
                            return_sequences=True)(lstm_inputs)
         if self.dropout > 0.0:
             lstm_outputs = kl.Dropout(self.dropout)(lstm_outputs)
     else:
         # no LSTM over memory blocks
         lstm_outputs = lstm_inputs
     outputs = kl.TimeDistributed(kl.Dense(self.output_symbols_number,
                                           activation="softmax",
                                           input_shape=(self.rnn_size, )),
                                  name="output")(lstm_outputs)
     compile_args = {
         "optimizer": ko.nadam(clipnorm=5.0),
         "loss": "categorical_crossentropy"
     }
     self.model_ = keras.models.Model(inputs, outputs)
     self.model_.compile(**compile_args)
     if self.verbose > 0:
         print(self.model_.summary())
     if self.use_attention:
         self._attention_func_ = kb.Function(inputs + [kb.learning_phase()],
                                             [attention_probs])
     self.hidden_state_func_ = kb.Function(inputs + [kb.learning_phase()],
                                           [lstm_outputs])
     return self