def run_neural_net(X_train, Y_train, X_test, Y_test): # Rows are samples, columns are features INPUT_NODES = X_train.shape[1] OUTPUT_NODES = 1 # create model model = Sequential() model.add( Dense(512, input_dim=INPUT_NODES, init='uniform', activation='relu')) model.add(Dropout(0.8)) #model.add(Dense(X.shape[1], init='uniform', activation='relu')) model.add(Dense(OUTPUT_NODES, init='uniform', activation='sigmoid')) #model = Sequential([ # Dense(32, input_dim=X.shape[1], init='uniform'), # Activation('relu'), # #Dense(10, init='uniform'), # #Activation('relu'), # Dense(1, init='uniform'), # Activation('sigmoid'), #]) # Compile model #model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', recall, precision, window_diff_metric, size1, size2]) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy', recall, precision]) # Fit the model model.fit(X_train, Y_train, nb_epoch=200, batch_size=100, validation_data=(X_test, Y_test)) # evaluate the model print 'Evaluating...\n' scores = model.evaluate(X_test, Y_test) print Y_test.shape for (name, score) in zip(model.metrics_names, scores): print " %s: %0.3f" % (name, score) predictions = model.predict(X_test) # calculate predictions rounded = np.round(predictions) rounded = np.expand_dims(rounded, axis=0) print helper.windiff_metric_NUMPY(np.expand_dims(Y_test, axis=0), rounded) pdb.set_trace()
def train_LSTM(X, Y, model, train_split=0.8, epochs=10, batch_size=32): # Clinical SAMPLE_TYPE_cli, X_cli, Y_cli = get_input(sample_type=4, shuffle_documents=False, pad=False) which_model = 2 if which_model == 2: custom_fit(X, Y, train_split=train_split, model=model, epochs=epochs) print "Clinical Data" custom_fit(X_cli, Y_cli, train_split=1, model=model) # Test clinical elif which_modle == 1: # Works for TYPE2 but check for others # Both these lines work for which_model == 1 X_train, Y_train, X_test, Y_test = split_data(X, Y, train_split=train_split) model.fit(X_train, Y_train, shuffle=False, nb_epoch=epochs, batch_size=batch_size, validation_data=(X_test, Y_test)) # WIkipedia #model.evaluate(X_test, Y_test, batch_size=batch_size) #pred = model.predict(X_test) #rounded = np.round(pred) #result = helper.windiff_metric_NUMPY(Y_test, rounded) #print result # Clinical # Temporary TRUNCATION TRUNCATE_LEN = X_train.shape[1] print "NOTE: Truncating the Test dataset(clinical) from %d sentences to %d sentences." % ( X_cli.shape[1], TRUNCATE_LEN) X_cli, Y_cli = X_cli[:, :TRUNCATE_LEN, :], Y_cli[:, :TRUNCATE_LEN, :] model.evaluate(X_cli, Y_cli, batch_size=batch_size) pred = model.predict(X_cli) rounded = np.round(pred) result = helper.windiff_metric_NUMPY(Y_cli, rounded, win_size=10, rounded=True) print result pdb.set_trace()
def train_LSTM(X, Y, model, embedding_W, train_split=0.8, epochs=10, batch_size=32): global X_wiki, Y_wiki, X_cli, Y_cli, X_bio, Y_bio which_model = 2 if which_model == 2: custom_fit(X, Y, model=model, batch_size=batch_size, train_split=train_split, epochs=epochs) print "############## Clinical Data ###########" custom_fit(X_cli, Y_cli, model=model, batch_size=batch_size, train_split=0, epochs=-1) # Test clinical print "############## Biography Data ###########" custom_fit(X_bio, Y_bio, model=model, batch_size=batch_size, train_split=0, epochs=-1) # Test biography elif which_modle == 1: # Works for TYPE2 but check for others # Both these lines work for which_model == 1 X_train, Y_train, X_test, Y_test = split_data(X, Y, train_split=train_split) model.fit(X_train, Y_train, shuffle=False, nb_epoch=epochs, batch_size=batch_size, validation_data=(X_test, Y_test)) # WIkipedia #model.evaluate(X_test, Y_test, batch_size=batch_size) #pred = model.predict(X_test) #rounded = np.round(pred) #result = helper.windiff_metric_NUMPY(Y_test, rounded) #print result # Clinical # Temporary TRUNCATION TRUNCATE_LEN = X_train.shape[1] print "NOTE: Truncating the Test dataset(clinical) from %d sentences to %d sentences." % ( X_cli.shape[1], TRUNCATE_LEN) X_cli, Y_cli = X_cli[:, :TRUNCATE_LEN, :], Y_cli[:, :TRUNCATE_LEN, :] model.evaluate(X_cli, Y_cli, batch_size=batch_size) pred = model.predict(X_cli) rounded = np.round(pred) result = helper.windiff_metric_NUMPY(Y_cli, rounded, win_size=10, rounded=True) print result pdb.set_trace()
def custom_fit(X, Y, model, train_split=0.8, epochs=10): if train_split == 1: X_test, Y_test = X, Y else: # This is only for training! (If train_split =1 then only TEST) X_train, Y_train, X_test, Y_test = split_data(X, Y, train_split=train_split) print "Batch size = 1" print 'Train...' for epoch in range(epochs): mean_tr_acc = [] mean_tr_loss = [] for i in range(len(X_train)): #y_true = Y_train[i] for sequence, truth in zip(X_train[i], Y_train[i]): # Sequence in document sequence = sequence.reshape((1, sequence.shape[0])) sequence = np.expand_dims(sequence, axis=0) tr_loss, tr_acc = model.train_on_batch( [sequence, sequence, sequence, sequence], truth) mean_tr_acc.append(tr_acc) mean_tr_loss.append(tr_loss) model.reset_states() print('accuracy training = {}'.format(np.mean(mean_tr_acc))) print('loss training = {}'.format(np.mean(mean_tr_loss))) print('___________________________________') mean_te_acc = [] mean_te_loss = [] predictions = [] for i in range(len(X_test)): for sequence, truth in zip(X_test[i], Y_test[i]): sequence = sequence.reshape((1, sequence.shape[0])) sequence = np.expand_dims(sequence, axis=0) te_loss, te_acc = model.test_on_batch( [sequence, sequence, sequence, sequence], truth) mean_te_acc.append(te_acc) mean_te_loss.append(te_loss) model.reset_states() predictions.append([]) for sequence, truth in zip(X_test[i], Y_test[i]): sequence = sequence.reshape((1, sequence.shape[0])) sequence = np.expand_dims(sequence, axis=0) y_pred = model.predict_on_batch( [sequence, sequence, sequence, sequence]) predictions[i].append(y_pred) model.reset_states() print('accuracy testing = {}'.format(np.mean(mean_te_acc))) print('loss testing = {}'.format(np.mean(mean_te_loss))) print "Check windiff value" #rounded = np.round(predictions) result = helper.windiff_metric_NUMPY(Y_test, predictions, win_size=-1, rounded=False) print result print('___________________________________')
def custom_fit(X, Y, model, train_split=0.8, epochs=10): if train_split == 0: X_test, Y_test = X, Y else: # This is only for training! (If train_split =1 then only TEST) X_train, Y_train, X_test, Y_test = split_data(X, Y, train_split=train_split) print "Batch size = 1" print 'Train...' _total_docs = len(X_train) _total_sentences = sum([sequence.shape[0] for sequence in X_train]) for epoch in range(epochs): mean_tr_acc = [] mean_tr_loss = [] _sentence_no = 0 for i in range(len(X_train)): #y_true = Y_train[i] for sequence, truth in zip(X_train[i], Y_train[i]): # Sequence in document sequence = sequence.reshape((1, sequence.shape[0])) #sequence = np.expand_dims(sequence, axis=0) tr_loss, tr_acc = model.train_on_batch([sequence], truth) mean_tr_acc.append(tr_acc) mean_tr_loss.append(tr_loss) _sentence_no += 1 print ">> Epoch: %d/%d | Doc: %d/%d | Sent: %d/%d" %(epoch+1, epochs, i+1, _total_docs, _sentence_no+1, _total_sentences) model.reset_states() print('accuracy training = {}'.format(np.mean(mean_tr_acc))) print('loss training = {}'.format(np.mean(mean_tr_loss))) print('___________________________________') # Testing mean_te_acc = [] mean_te_loss = [] predictions = [] _total_docs = len(X_test) _total_sentences = sum([sequence.shape[0] for sequence in X_test]) _sentence_no = 0 for i in range(len(X_test)): for sequence, truth in zip(X_test[i], Y_test[i]): sequence = sequence.reshape((1, sequence.shape[0])) #sequence = np.expand_dims(sequence, axis=0) te_loss, te_acc = model.test_on_batch([sequence], truth) mean_te_acc.append(te_acc) mean_te_loss.append(te_loss) _sentence_no += 1 print ">> TEST >> Doc: %d/%d | Sent: %d/%d" %(i+1, _total_docs, _sentence_no+1, _total_sentences) model.reset_states() print('accuracy testing = {}'.format(np.mean(mean_te_acc))) print('loss testing = {}'.format(np.mean(mean_te_loss))) print("Predicting...") for i in range(len(X_test)): predictions.append([]) for sequence, truth in zip(X_test[i], Y_test[i]): sequence = sequence.reshape((1, sequence.shape[0])) #sequence = np.expand_dims(sequence, axis=0) y_pred = model.predict_on_batch([sequence]) predictions[i].append(y_pred) model.reset_states() print "Check windiff value" #rounded = np.round(predictions) result = helper.windiff_metric_NUMPY(Y_test, predictions, win_size=-1, rounded=False) print result print('___________________________________')