def compile(self, model): self.optimizer = tf.train.RMSPropOptimizer(0.001) self.opt = self.optimizer.minimize(model.total_loss, var_list=[self.D0]) self.fit_op = K.Function( [model.input, model.targets[0], model.sample_weights[0]], [self.opt]) self.loss = K.Function( [model.input, model.targets[0], model.sample_weights[0]], [model.total_loss]) print( "Reduction Layer Compiled, batch %d" % self.patch_layer.patch_size, "\n", "Output shape:", self.compute_output_shape(self.input_shape_t))
def running(options): fold = 1 dt = Dataset(options.dataset) # load data train_datas, train_labels, val_datas, val_labels, test_datas, test_labels \ = dt.load_data(nevents=options.num_events, nsamples=options.num_samples, fold=fold) sequential_test_datas = dt.sequentialize_data(test_datas, timestep=options.time_step) random.shuffle(sequential_test_datas) # build model and load weights att_s_beta_vae = AttSBetaVAE(options) K.reset_uids() sbvae = att_s_beta_vae.build_model(options) sbvae.load_weights(options.result_path + sbvae.name + '/fold_' + str(fold) + '_last_weight.h5') # get the bottleneck features h_out = np.empty((1, 15)) num_to_plot = 300 for i in range(options.num_events): h_fnc = K.Function([sbvae.input], [sbvae.layers[15 + i].output]) h = h_fnc([sequential_test_datas[:num_to_plot]])[0] h_out = np.concatenate([h_out, h]) # visualization dt.visualization(datas=h_out[1:], name='Att_s_beta_VAE')
def generate_data(self, model, event_index, input_data): model = model model.load_weights(self.op.result_path + self.op.name + '_' + str(self.op.nevents) + '/cp_weight.h5') # Get the output of hidden layers. # e_fnc: gets the z* of target event; # decoder_fnc gets the output of decoder, which reconstruct the given inputs. e_fnc = K.Function( [model.input], [model.layers[8 + self.nevents + event_index + 1].output]) decoder_fnc = K.Function([model.layers[6].output], [model.output[0]]) event_num = e_fnc([input_data])[0] decoder_num = decoder_fnc([event_num])[0] gen_datas = decoder_num return gen_datas
def create_funtions(self): if not self.is_compiled: self.compile_model() if self.is_var: indices = [i for (i, layer) in enumerate(self.model.layers) if layer.name == 'variationaldense'] else: indices = [i for (i, layer) in enumerate(self.model.layers) if layer.name == 'dense'] index = np.int(np.mean(indices)) if self.model.layers[index + 1].name == 'batchnormalization': index = index + 1 logging.info('Middle layer followed by batchnorm. This layer ({})' ' is chosen as latent space'.format(index)) logging.info('Creating encoder and decoder function') self.encoder = K.Function([self.model.layers[0].get_input(train=False)], [self.model.layers[index].get_output(train=False)]) # index is the middle layer? i.e. the latent space? self.decoder = K.Function([self.model.layers[index + 1].get_input(train=False)], [self.model.layers[-1].get_output(train=False)], **{'on_unused_input': 'warn'})
def setup_model(self): current_state = K.placeholder(shape=(None,)+self.env.observation_space.shape) next_state = K.placeholder(shape=(None,)+self.env.observation_space.shape) action = K.placeholder(ndim=1) terminated = K.placeholder(ndim=1) reward = K.placeholder(ndim=1) current_Q = self.model.net(current_state) next_Q = self.model.net(next_state) target_Q optimizer = tf.train.RMSProp() loss = K.mean(K.square(target_q-current_Q)) op = K.Function([current_state,next_state,action,reward,terminated],[optimizer.minimize(loss)])
def _make_train_function(self): # pylint: disable=attribute-defined-outside-init """ We override this method so that we can use tensorflow optimisers directly. This is desirable as tensorflow handles gradients of sparse tensors efficiently. """ if not hasattr(self, 'train_function'): raise RuntimeError('You must compile your model before using it.') if self.train_function is None: inputs = self._feed_inputs + self._feed_targets + self._feed_sample_weights if self.uses_learning_phase and not isinstance( K.learning_phase(), int): inputs += [K.learning_phase()] # Here we override Keras to use tensorflow optimizers directly. self.global_step = K.variable(0., name='global_step') gradients = tensorflow.gradients(self.total_loss, self._collected_trainable_weights) if self.gradient_clipping is not None: # Don't pop from the gradient clipping dict here as # if we call fit more than once we need it to still be there. clip_type = self.gradient_clipping.get("type") clip_value = self.gradient_clipping.get("value") if clip_type == 'clip_by_norm': gradients, _ = tensorflow.clip_by_global_norm( gradients, clip_value) elif clip_type == 'clip_by_value': gradients = [ tensorflow.clip_by_value(x, -clip_value, clip_value) for x in gradients ] else: raise ConfigurationError( "{} is not a supported type of gradient clipping.". format(clip_type)) zipped_grads_with_weights = zip(gradients, self._collected_trainable_weights) # pylint: disable=no-member training_updates = self.optimizer.apply_gradients( zipped_grads_with_weights, global_step=self.global_step) # pylint: enable=no-member updates = self.updates + [training_updates] # Gets loss and metrics. Updates weights at each call. self.train_function = K.Function(inputs, [self.total_loss] + self.metrics_tensors, updates=updates)
def attention_visuliaze(): weight_lay = ['alpha_prob', 'beta_prob', 'final_att'] inp = mm.input weights = {} for layer in mm.layers: if layer.name in weight_lay: func = K.Function(inp, [layer.output]) weight = func([X_test[:, :maxSeqLen], X_test[:, maxSeqLen:]])[0] weights[layer.name] = weight for line in range(10000): sent, con = [], [] for i in range(len(X_test[line])): if i>=maxSeqLen: con.append(id2con[int(X_test[line][i])]) else: sent.append(id2w[int(X_test[line][i])]) sent = [item for item in sent if item != '<PAD>'] print(id2label[y_test[line]], id2label[y_pred[line]], ''.join(sent)) print(con) print(weights['alpha_prob'][line], '||', (255-weights['alpha_prob'][line]*255)) print(weights['beta_prob'][line], '||', (255-weights['beta_prob'][line]*255)) print(weights['final_att'][line], '||', (255-weights['final_att'][line]*255)) print()
def setup_model(self): inputs = layers.Input(shape=self.input_dim) y = nn.ReductionLayer(8,64,0.001)(inputs) y = layers.Flatten()(y) self.y = layers.Dense(256,activation="tanh")(y) g = K.Function([inputs],[self.y]) X = g([agent.memory.sample(1000)["state"]])[0] self.init = nn.InitCentersRandom(X) y = nn.RBFLayer(512,initializer=self.init)(self.y) x = layers.Dense(128,activation="tanh")(y) x = layers.Dense(128,activation="tanh")(x) x = layers.Dense(128,activation="relu")(x) x = layers.Dense(64,activation="relu")(x) outputs = layers.Dense(self.output_n,activation='linear')(x) self.net = keras.models.Model(inputs, outputs) optim = keras.optimizers.RMSprop(lr=0.00025, rho=0.95, epsilon=0.01) self.net.compile(optimizer=optim,loss='mse') #self.reducer.compile(self.model) print(self.net.summary())
idx = np.argsort(X_tr) X_tr = X_tr[idx] Z_tr = Z_tr[idx] # Learned coefs = K.random_normal_variable((100, ), 0, 0.1) pows = K.arange(100, dtype='float32') X = K.placeholder((None, 1)) Y = K.mean(K.exp(-K.square(X) * coefs), axis=1) Z = K.placeholder(ndim=1) loss = K.mean(K.square(Y - Z)) optimizer = tf.train.AdamOptimizer() opt = optimizer.minimize(loss, var_list=[coefs]) grad = K.Function([X, Z], [opt]) get_loss = K.Function([X, Z], [loss]) get_Y = K.Function([X], [Y]) for i in range(1000): grad([X_tr.reshape(-1, 1), Z_tr]) if not i % 100: print(get_loss([X_tr.reshape(-1, 1), Z_tr])) plt.plot(X_tr, Z_tr) plt.plot(X_tr, get_Y([X_tr.reshape(-1, 1)])[0]) plt.show() # Change Z_tt Z_tt = Z_tr.copy()
results_index = 0 for r in range(repeats): cross_val_split = KFold(n_splits=cv_splits, shuffle=True) for train, val in cross_val_split.split(X): X_train = X[train] X_val = X[val] y_train = y[train] y_val = y[val] #------- Train the wide and deep neural network ------# wdnn = get_wide_deep_raw_features() wdnn.fit(X_train, alpha_matrix[train], epochs=100, verbose=False, validation_data=[X_val, alpha_matrix[val]]) mc_dropout = K.Function(wdnn.inputs + [K.learning_phase()], wdnn.outputs) #wdnn_probs = ensemble(X_val, y_val, mc_dropout) wdnn_probs = wdnn.predict(X_val) for i, drug in enumerate(drugs): non_missing_val = np.where(y_val[:, i] != -1)[0] auc_y = np.reshape(y_val[non_missing_val, i], (len(non_missing_val), 1)) auc_preds = np.reshape(wdnn_probs[non_missing_val, i], (len(non_missing_val), 1)) val_auc = roc_auc_score(auc_y, auc_preds) val_auc_pr = average_precision_score( 1 - y_val[non_missing_val, i], 1 - wdnn_probs[non_missing_val, i]) results.loc[results_index] = [ 'WDNN Raw Features', drug, val_auc, val_auc_pr ]
# custom loss function - we only care about (cos,sin) outputs at the # non-zero positions in the sparse y_true vector. To avoid driving the # other samples to 0 we use a sparse loss function. The normalisation # term accounts for the time varying number of no-zero samples. def sparse_loss(y_true, y_pred): mask = K.cast(K.not_equal(y_true, 0), dtype='float32') n = K.sum(mask) return K.sum(K.square((y_pred - y_true) * mask)) / n # testing custom loss function y_true = Input(shape=(None, )) y_pred = Input(shape=(None, )) loss_func = K.Function([y_true, y_pred], [sparse_loss(y_true, y_pred)]) assert loss_func([[[0, 1, 0]], [[2, 2, 2]]]) == np.array([1]) assert loss_func([[[1, 1, 0]], [[3, 2, 2]]]) == np.array([2.5]) assert loss_func([[[0, 1, 0]], [[0, 2, 0]]]) == np.array([1]) # fit the model from keras import optimizers sgd = optimizers.SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True) model.compile(loss=sparse_loss, optimizer=sgd) # training propper with real phase data history = model.fit(amp_train, phase_train_rect, batch_size=nb_batch, epochs=args.epochs, validation_split=0.1)
def running(options): dt = Dataset(options.dataset) # 1.First construct polyphonic datasets by mixing single event sound, and extract MFCCs features. if options.mix_data: dt.mix_data(nevents=options.num_events, nsamples=options.num_samples, isUnbalanced=True) f1_list, er_list, fold_list = [], [], [] att_s_beta_vae = AttSBetaVAE(options) # 2.Load data. train_datas, train_labels, test_datas, test_labels \ = dt.load_data(nevents=options.num_events, nsamples=options.num_samples, fold=1, isUnbalanced=True) sequential_train_datas = dt.sequentialize_data(train_datas, timestep=options.time_step) sequential_test_datas = dt.sequentialize_data(test_datas, timestep=options.time_step) # 3.Create attention-based supervised beta-VAE model and train it with unbalanced datas. K.reset_uids() model = att_s_beta_vae.build_model(options) att_s_beta_vae.train_model(model, x_train=sequential_train_datas, y_train=train_labels, fold=1, new_weights=options.result_path + model.name + '/fold_1_last_weight_DA.h5') # 4.Evaluate the performance on F1 and ER # Param: supervised is set to 'False' default. 'True' for supervised beta-VAE and 'False' for others. # This function evaluate the segment-based F1 score and ER. f1_score, error_rate = att_s_beta_vae.metric_model( model, sequential_test_datas, test_labels, supervised=True, new_weight_path='fold_1_last_weight_DA.h5') print( 'Before data augmentation ' '>>> nevents {nevents}, nsamples {nsamples} ==> error_rate: {error_rate}, f1_score: {f1_score}' .format(nevents=options.num_events, nsamples=options.num_samples, error_rate=error_rate, f1_score=f1_score)) # 5.define the function to get z^*, here the inefficient category is the first events. z_star_fnc = K.Function([model.input], [model.layers[14].output]) # here x are the input raw features extracted the first category x = [] index = [] y_addition = [] for idx in range(len(train_labels)): y = train_labels[idx] if (y == [1, 0, 0, 0, 0]).all(): index.append(idx) x.append(sequential_train_datas[idx]) y_addition.append(y) z_star = z_star_fnc([x])[0] # 6.define the decoder decoder_fnc = K.Function([model.layers[6].output], [model.output[0]]) generated_data = decoder_fnc([z_star])[0] # 7.augment the training set with generated data sequential_train_datas = np.concatenate( [sequential_train_datas, generated_data]) train_labels = np.concatenate([train_labels, y_addition]) # 8.retrain the model K.reset_uids() model = att_s_beta_vae.build_model(options) att_s_beta_vae.train_model(model, x_train=sequential_train_datas, y_train=train_labels, fold=1, new_weights=options.result_path + model.name + '/fold_1_last_weight_DA_after.h5') f1_score_DA, error_rate_DA = att_s_beta_vae.metric_model( model, sequential_test_datas, test_labels, supervised=True, new_weight_path='fold_1_last_weight_DA_after.h5') print( 'nevents {nevents}, nsamples {nsamples} ==> error_rate: {error_rate}, f1_score: {f1_score}' .format(nevents=options.num_events, nsamples=options.num_samples, error_rate=error_rate_DA, f1_score=f1_score_DA)) fold_list.append('AVER') result_df = pd.DataFrame({'F1': f1_list, 'ER': er_list}, index=fold_list) result_df.to_csv(options.result_path + options.name + '_' + str(options.num_events) + '/K_Folds_results.csv') return result_df
# Get feature and label data for current drug X_mlp = df_X[num_snp_indiv_val[i]].as_matrix() # Label data for current drug y_true_drug = y_true[:, j] # Disregard rows for which no resistance data exists y_true_small = y_true_drug[y_true_drug != -1] X_small = X_mlp[y_true_drug != -1] # Get test data for current drug and proper SNPs y_test_drug = y_test[:, j] y_test_small = y_test_drug[y_test_drug != -1] X_test_small = full_validation_df[num_snp_indiv_val[i]].as_matrix() X_test_small = X_test_small[y_test_drug != -1] # Train on MLP clf1 = get_mlp_single() clf1.fit(X_small, y_true_small, nb_epoch=50) clf_do = K.Function(clf1.inputs + [K.learning_phase()], clf1.outputs) y_pred_strat_test = ensemble(X_test_small, np.expand_dims(y_test_small, axis=1), clf_do) y_pred_strat_train = ensemble(X_small, np.expand_dims(y_true_small, axis=1), clf_do) # Compute AUC scores for validation set auc_strat_data_test[i] = roc_auc_score(y_test_small, y_pred_strat_test) # Get sensitivity and specificity for validation set strat_data_indiv = get_threshold(y_true_small, y_pred_strat_train, y_test_small, y_pred_strat_test) strat_data_indiv = get_sens_spec_from_threshold(y_test_small, y_pred_strat_test, strat_thresh_from_cv[j]) spec_strat_data_test[i] = strat_data_indiv['spec'] sens_strat_data_test[i] = strat_data_indiv['sens'] plot_fpr_tpr = plot_roc_auc(drug, y_test_small, y_pred_strat_test)
# code in "Keras backend" language def n0_dft(n0_scaled): n0_scaled = K.print_tensor(n0_scaled, "n0_scaled is: ") n0 = n0_scaled * gain #*P_max n0 = K.print_tensor(n0, "n0 is: ") #note n0_scaled = n0/P_max such that n0_scaled stays betwen [0..1] N = width cos_term = K.cos(n0 * K.cast(K.arange(N), dtype='float32') * np.pi / N) sin_term = K.sin(-n0 * K.cast(K.arange(N), dtype='float32') * np.pi / N) return K.concatenate([cos_term, sin_term], axis=-1) # testing custom layer against numpy implementation a = layers.Input(shape=(None, )) custom_layer = K.Function([a], [n0_dft(a)]) for i in range(10): e_test = np.array(custom_layer([[[n0[i] / gain]]])) # so e_test is continuous, we just want to sample at nonzero harmonic points ind = np.nonzero(e_rect[i, :]) err = (e_rect[i, ind] - e_test[0, 0, ind]) # there will be a small error as the GPU and Host don't always agree print(i, L[i], n0[i], err.shape, np.std(err)) assert (np.mean(np.std(err)) < 1E-4) print("n0_dft custom layer tested") # custom loss function def sparse_loss(y_true, y_pred): mask = K.cast(K.not_equal(y_pred, 0), dtype='float32') #mask = K.print_tensor(mask, "mask is: ")
model.summary() X = np.random.normal(0, 1, (1000, 64, 64, 1)) Y = np.mean(X, axis=(1, 2)) W = np.random.normal(0, 1, (1000, 64, 64, 1)) Z = np.mean(W, axis=(1, 2)) reducer.compile(model) model.fit(X, Y) reducer.fit(X, Y) print(reducer.display_update()) model.evaluate(W, Z) targets = K.Function(model.targets, model.targets) output = K.Function([model.input], [model.output]) W = output([X]) Z = targets([Y]) loss = K.Function([model.input, model.targets[0], model.sample_weights[0]], [model.total_loss]) sess.run(model.total_loss, feed_dict={ model.input: X, model.targets[0]: [y for y in Y] }) model.fit(X, Y, batch_size=len(X)) f K.eval() loss([X, Y, np.ones(len(X))])
def f_score_diff(y_true, y_pred, eps=1e-6): true_positive_count = K.sum(y_true * y_pred, axis=[1, 2, 3]) false_negative_count = K.sum((1 - y_true) * y_pred, axis=[1, 2, 3]) false_positive_count = K.sum(y_true * (1 - y_pred), axis=[1, 2, 3]) return -K.mean(((5 * true_positive_count + eps) * 100) / (5 * true_positive_count + 4 * false_negative_count + false_positive_count + eps), axis=0) from keras import layers from keras import losses x = layers.Input(shape=(None, None, None, 1)) y = layers.Input(shape=(None, None, None, 1)) loss_func = K.Function([x, y], [iou(x, y, 1e-6)]) a1 = np.array([[0, 0, 0], [0, 0, 0]]) a1 = np.expand_dims(a1, -1) a2 = np.array([[0, 0, 0], [0, 0, 0]]) a2 = np.expand_dims(a2, -1) a3 = np.array([[1, 1, 0], [0, 0, 0]]) a3 = np.expand_dims(a3, -1) a_true = np.stack((a1, a2, a3)) b1 = np.array([[0, 0, 0], [0, 0, 0]]) b1 = np.expand_dims(b1, -1) b2 = np.array([[0, 0, 0], [0, 0, 0]]) b2 = np.expand_dims(b2, -1) b3 = np.array([[0, 0, 0], [0, 0, 0]]) b3 = np.expand_dims(b3, -1) b_pred = np.stack((b1, b2, b3))
loaded_model = model_from_json(fhl.read()) loaded_model.load_weights(arg.weights) # Get test data df_X_test = pd.read_csv(arg.genotype_data) df_y_test = pd.read_csv(arg.phenotype_data) X_test = df_X_test.as_matrix() y_test = df_y_test.as_matrix() # Ensembling def ensemble(X, y, function): preds = np.zeros_like(y, dtype=np.float) for i in range(100): preds += np.squeeze(np.array(function([X, 1])), axis=0) return preds / 100 clf_dom = K.Function(loaded_model.inputs + [K.learning_phase()], loaded_model.outputs) ## NOTE: A PREDICTION OF 0 CORRESPONDS TO A "RESISTANT" PHENOTYPE AND ## A PREDICTION OF 1 CORREPONDS TO A "SUSCEPTIBLE" PHENOTYPE. y_pred = ensemble(X_test, y_test, clf_dom) np.savetxt( sys.stdout, y_pred, delimiter=",", header="rif, inh, pza, emb, str, cip, cap, amk, moxi, oflx, kan")
from keras import Model from keras import initializers import matplotlib.pyplot as plt from scipy import signal from keras import backend as K # custom loss function def sparse_loss(y_true, y_pred): mask = K.cast( K.not_equal(y_pred, 0), dtype='float32') n = K.sum(mask) return K.sum(K.square((y_pred - y_true)*mask))/n # testing custom loss function x = Input(shape=(None,)) y = Input(shape=(None,)) loss_func = K.Function([x, y], [sparse_loss(x, y)]) assert loss_func([[[1,1,1]], [[0,2,0]]]) == np.array([1]) assert loss_func([[[0,1,0]], [[0,2,0]]]) == np.array([1]) # constants N = 80 # number of time domain samples in frame nb_samples = 400000 nb_batch = 32 nb_epochs = 10 width = 256 pairs = 2*width fo_min = 50 fo_max = 400 Fs = 8000
def make(self, theano_kwargs=None): '''Make the model and compile it. Igor's config options control everything. Arg: theano_kwargs as dict for debugging theano or submitting something custom ''' if self.igor.embedding_type == "convolutional": make_convolutional_embedding(self.igor) elif self.igor.embedding_type == "token": make_token_embedding(self.igor) elif self.igor.embedding_type == "shallowconv": make_shallow_convolutional_embedding(self.igor) elif self.igor.embedding_type == "minimaltoken": make_minimal_token_embedding(self.igor) else: raise Exception("Incorrect embedding type") B = self.igor.batch_size spine_input_shape = (B, self.igor.max_num_supertags) child_input_shape = (B, 1) parent_input_shape = (B, 1) E, V = self.igor.word_embedding_size, self.igor.word_vocab_size # for word embeddings repeat_N = self.igor.max_num_supertags # for lex mlp_size = self.igor.mlp_size ## dropout parameters p_emb = self.igor.p_emb_dropout p_W = self.igor.p_W_dropout p_U = self.igor.p_U_dropout w_decay = self.igor.weight_decay p_mlp = self.igor.p_mlp_dropout def predict_params(): return { 'output_dim': 1, 'W_regularizer': l2(w_decay), 'activation': 'relu', 'b_regularizer': l2(w_decay) } dspineset_in = Input(batch_shape=spine_input_shape, name='daughter_spineset_in', dtype='int32') pspineset_in = Input(batch_shape=spine_input_shape, name='parent_spineset_in', dtype='int32') dhead_in = Input(batch_shape=child_input_shape, name='daughter_head_input', dtype='int32') phead_in = Input(batch_shape=parent_input_shape, name='parent_head_input', dtype='int32') dspine_in = Input(batch_shape=child_input_shape, name='daughter_spine_input', dtype='int32') inputs = [dspineset_in, pspineset_in, dhead_in, phead_in, dspine_in] ### Layer functions ############# Convert the word indices to vectors F_embedword = Embedding(input_dim=V, output_dim=E, mask_zero=True, W_regularizer=l2(w_decay), dropout=p_emb) if self.igor.saved_embeddings is not None: self.logger.info("+ Cached embeddings loaded") F_embedword.initial_weights = [self.igor.saved_embeddings] ###### Prediction Functions ## these functions learn a vector which turns a tensor into a matrix of probabilities ### P(Parent supertag | Child, Context) F_parent_predict = ProbabilityTensor( name='parent_predictions', dense_function=Dense(**predict_params())) ### P(Leaf supertag) F_leaf_predict = ProbabilityTensor( name='leaf_predictions', dense_function=Dense(**predict_params())) ###### Network functions. ##### Input word, correct its dimensions (basically squash in a certain way) F_singleword = compose(Fix(), F_embedword) ##### Input spine, correct diemnsions, broadcast across 1st dimension F_singlespine = compose(RepeatVector(repeat_N), Fix(), self.igor.F_embedspine) ##### Concatenate and map to a single space F_alignlex = compose( RepeatVector(repeat_N), Dropout(p_mlp), Dense(mlp_size, activation='relu', name='dense_align_lex'), concat) F_alignall = compose( Distribute(Dropout(p_mlp), name='distribute_align_all_dropout'), Distribute(Dense(mlp_size, activation='relu', name='align_all_dense'), name='distribute_align_all_dense'), concat) F_alignleaf = compose( Distribute( Dropout(p_mlp * 0.66), name='distribute_leaf_dropout' ), ### need a separate oen because the 'concat' is different for the two situations Distribute(Dense(mlp_size, activation='relu', name='leaf_dense'), name='distribute_leaf_dense'), concat) ### embed and form all of the inputs into their components ### note: spines == supertags. early word choice, haven't refactored. leaf_spines = self.igor.F_embedspine(dspineset_in) pspine_context = self.igor.F_embedspine(pspineset_in) dspine_single = F_singlespine(dspine_in) dhead = F_singleword(dhead_in) phead = F_singleword(phead_in) ### combine the lexical material lexical_context = F_alignlex([dhead, phead]) #### P(Parent Supertag | Daughter Supertag, Lexical Context) ### we know the daughter spine, want to know the parent spine ### size is (batch, num_supertags) parent_problem = F_alignall( [lexical_context, dspine_single, pspine_context]) ### we don't have the parent, we just have a leaf leaf_problem = F_alignleaf([lexical_context, leaf_spines]) parent_predictions = F_parent_predict(parent_problem) leaf_predictions = F_leaf_predict(leaf_problem) predictions = [parent_predictions, leaf_predictions] theano_kwargs = theano_kwargs or {} ## make it quick so i can load in the weights. self.model = Model(input=inputs, output=predictions, preloaded_data=self.igor.preloaded_data, **theano_kwargs) #mask_cache = traverse_nodes(parent_prediction) #desired_masks = ['merge_3.in.mask.0'] #self.p_tensor = K.function(inputs+[K.learning_phase()], [parent_predictions, F_parent_predict.inbound_nodes[0].input_masks[0]]) if self.igor.from_checkpoint: self.load_checkpoint_weights() elif not self.igor.in_training: raise Exception("No point in running this without trained weights") if not self.igor.in_training: expanded_children = RepeatVector(repeat_N, axis=2)(leaf_spines) expanded_parent = RepeatVector(repeat_N, axis=1)(pspine_context) expanded_lex = RepeatVector(repeat_N, axis=1)( lexical_context ) # axis here is arbitary; its repeating on 1 and 2, but already repeated once huge_tensor = concat( [expanded_lex, expanded_children, expanded_parent]) densely_aligned = LastDimDistribute( F_alignall.get(1).layer)(huge_tensor) output_predictions = Distribute( F_parent_predict, force_reshape=True)(densely_aligned) primary_inputs = [phead_in, dhead_in, pspineset_in, dspineset_in] leaf_inputs = [phead_in, dhead_in, dspineset_in] self.logger.info("+ Compiling prediction functions") self.inner_func = K.Function(primary_inputs + [K.learning_phase()], output_predictions) self.leaf_func = K.Function(leaf_inputs + [K.learning_phase()], leaf_predictions) try: self.get_ptensor = K.function( primary_inputs + [K.learning_phase()], [ output_predictions, ]) except: import pdb pdb.set_trace() else: optimizer = Adam(self.igor.LR, clipnorm=self.igor.max_grad_norm, clipvalue=self.igor.grad_clip_threshold) theano_kwargs = theano_kwargs or {} self.model.compile(loss="categorical_crossentropy", optimizer=optimizer, metrics=['accuracy'], **theano_kwargs)
def build_generate(self, model): ################# ## Model Setup ## ################# sess = K.get_session() model = keras.models.load_model(model, compile=False, custom_objects={'NNResize': NNResize}) encoder_input = model.input encoder_output = model.get_layer('z_mean').output encoder = K.Function([encoder_input], [encoder_output]) decoder_input = model.get_layer('dense_1').input decoder_output = model.output decoder = K.Function([decoder_input], [decoder_output]) output_shape = decoder_output.get_shape().as_list() ########################## ## Function Definitions ## ########################## tf_spectrogram = Spectrogram(n_fft=self.N_FFT, hop_length=self.HOP_LEN, freq_format='freq_last') mag_placeholder = tf.placeholder(shape=(1, *output_shape[1:]), dtype=np.float32) alpha = 100 init_recon = np.random.randn(1, int(self.expected_len)).astype(np.float32) signal_len = init_recon.shape[1] recon = tf.Variable(init_recon) recon_mel_out = tf_spectrogram.call(recon) stft_tf = tf.contrib.signal.stft(recon, frame_length=self.N_FFT, frame_step=self.HOP_LEN, pad_end=True) x_tf = tf.contrib.signal.inverse_stft( stft_tf, frame_length=self.N_FFT, frame_step=self.HOP_LEN, fft_length=self.N_FFT)[:, :signal_len] x_loss = tf.reduce_sum(tf.square(recon - x_tf)) mag_loss = tf.reduce_sum(tf.square(mag_placeholder - recon_mel_out)) recon_loss = alpha * x_loss + mag_loss sess.run(recon.initializer) recon_opt = tf.contrib.opt.ScipyOptimizerInterface( recon_loss, method='L-BFGS-B', options={'maxiter': 500}, var_list=[recon]) def generate(): for idx, coord_slider in enumerate(self.coord_sliders): self.current_coords[idx] = coord_slider.get() decoder_input = np.asarray([self.current_coords], dtype=np.float32) decoder_output = decoder([decoder_input])[0] amp_out = self.to_amp(decoder_output) amp_out[:, :(self.front_padding // self.HOP_LEN)] = 0 feed_dict = {mag_placeholder: amp_out} recon_opt.minimize(sess, feed_dict=feed_dict) print('Recon loss:', recon_loss.eval(session=sess, feed_dict=feed_dict)) recon_out = recon.eval(session=sess) self.audio = recon_out[0] self.ax.clear() self.ax.plot(self.audio) self.canvas.draw() self.generate = generate
def train_on_batch(self,X,Y): if not self.initialized: g = K.Function([self.inputs],[self.y]) Z = g([X])[0] self.init.X = Z
def setup_agent(self): advantages = K.placeholder( dtype=tf.float32, shape=[None]) # Target advantage function (if applicable) returns = K.placeholder(dtype=tf.float32, shape=[None]) # Empirical return observations = self.pi.observation_input actions = self.pi.pdtype.sample_placeholder([None]) self.pi = self.policy_func(self.env, observations) self.oldpi = self.policy_func(self.env, observations) self.value_function = self.value_func(self.env, observations, lr=self.vf_stepsize) kl_old_new = self.oldpi.kl(self.pi) entropy = self.pi.entropy() mean_kl = K.mean(kl_old_new) mean_entropy = K.mean(entropy) entropy_bonus = self.entropy_coeff * mean_entropy vferr = K.mean(K.square(self.pi.value_pred - returns)) ratio = tf.exp(self.pi.pd.logp(actions) - self.oldpi.pd.logp(actions)) # advantage * pnew / pold surrogate_gain = K.mean(ratio * advantages) optimization_gain = surrogate_gain + entropy_bonus losses = [ optimization_gain, mean_kl, entropy_bonus, surrogate_gain, mean_entropy ] self.loss_names = [ "Optim Gain", "mean KL", "entropy loss", "surrogate gain", "entropy" ] dist = mean_kl kl_gradient = self.pi.flatten.gradient(dist) tangents = K.placeholder(shape=(None, ) + K.eval(self.pi.variables.shape)) gradient_vector_product = K.sum(kl_gradient * tangents) #pylint: disable=E1111 fisher_vector_product = self.pi.flatten.flatgrad( gradient_vector_product) self.compute_losses = K.Function([observations, actions, advantages], losses) self.compute_loss_grad = K.Function( [observations, actions, advantages], losses + [self.pi.flatten.flatgrad(optimization_gain)]) self.compute_fisher_vector_product = K.Function( [tangents, observations, actions, advantages], fisher_vector_product) self.compute_value_function_lossandgrad = K.function( [observations, returns], [self.value_function.flatten.flatgrad(vferr)]) th_init = self.pi.flatten.get_value() print("Init param sum", th_init.sum(), flush=True)
def build(self): RNN = kl.GRU if self.rnn == "gru" else kl.LSTM if self.symbols_has_features: symbol_inputs = kl.Input(shape=(None, self.input_symbols_number), dtype='int32') else: symbol_inputs = kl.Input(shape=(None, ), dtype='int32') symbol_embeddings, symbol_inputs_length = self._build_symbol_layer( symbol_inputs) if not self.use_attention: if self.history > 1: symbol_inputs_length *= self.history pad = kb.zeros_like(symbol_embeddings[0, 0]) symbol_embeddings = kl.Lambda( make_history, arguments={ "h": self.history, "pad": pad, "flatten": True }, output_shape=(None, symbol_inputs_length))(symbol_embeddings) to_concatenate = [symbol_embeddings] else: encodings = RNN(self.memory_embeddings_size, return_sequences=True)(symbol_embeddings) if self.dropout > 0.0: encodings = kl.Dropout(self.dropout)(encodings) if self.symbols_has_features or self.use_embeddings: AttentionLayer, attention_inputs = AttentionCell3D, symbol_embeddings else: AttentionLayer, attention_inputs = AttentionCell, symbol_inputs attention_layer = AttentionLayer(self.history, symbol_inputs_length, self.memory_embeddings_size, use_bias=self.use_attention_bias) memory, attention_probs = attention_layer( [attention_inputs, encodings]) to_concatenate = [memory] if self.labels_ is not None: feature_inputs = kl.Input(shape=(self.feature_vector_size, )) inputs = [symbol_inputs, feature_inputs] feature_inputs_length = self.feature_vector_size if self.use_feature_embeddings: feature_inputs = kl.Dense( self.feature_embeddings_size, input_shape=(self.feature_vector_size, ), activation="relu", use_bias=False)(feature_inputs) feature_inputs_length = self.feature_embeddings_size # cannot use kb.repeat_elements because it requires an integer feature_inputs = kl.Lambda( repeat_, arguments={"k": kb.shape(symbol_embeddings)[1]}, output_shape=(None, feature_inputs_length))(feature_inputs) to_concatenate.append(feature_inputs) else: inputs = [symbol_inputs] lstm_inputs = (kl.Concatenate()(to_concatenate) if len(to_concatenate) > 1 else to_concatenate[0]) if not self.use_attention or self.use_output_rnn: lstm_outputs = RNN(self.rnn_size, return_sequences=True)(lstm_inputs) if self.dropout > 0.0: lstm_outputs = kl.Dropout(self.dropout)(lstm_outputs) else: # no LSTM over memory blocks lstm_outputs = lstm_inputs outputs = kl.TimeDistributed(kl.Dense(self.output_symbols_number, activation="softmax", input_shape=(self.rnn_size, )), name="output")(lstm_outputs) compile_args = { "optimizer": ko.nadam(clipnorm=5.0), "loss": "categorical_crossentropy" } self.model_ = keras.models.Model(inputs, outputs) self.model_.compile(**compile_args) if self.verbose > 0: print(self.model_.summary()) if self.use_attention: self._attention_func_ = kb.Function(inputs + [kb.learning_phase()], [attention_probs]) self.hidden_state_func_ = kb.Function(inputs + [kb.learning_phase()], [lstm_outputs]) return self