) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in range(training_epochs): # go through trainng set c = [] for batch_index in range(n_train_batches): c.append(train_da(batch_index)[1]) print('batch %d complete' % batch_index) # print 'Training epoch %d, cost ' % epoch, numpy.mean(c) print('Training epoch %d, sample is ' % epoch, c[-1]) end_time = time.clock() training_time = (end_time - start_time) print('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % (training_time / 60.)) if __name__ == '__main__': test_dA(load_data(r'D:\workspace\sentiment\data_balanced\lexical.txt'))
raise TypeError('y_real should have the same shape as y_pred', ('y_real ', len(y_real), 'y_pred', len(y_pred))) count = numpy.zeros([label_num, label_num]) for real, pred in zip(y_real, y_pred): count[real][pred] += 1 precison = count[target][target] / numpy.sum(count, axis=0)[target] recall = count[target][target] / numpy.sum(count, axis=1)[target] fscore = 2 * precison * recall / (precison + recall) print('p:{0:f} r:{1:f} f:{2:f}'.format(precison, recall, fscore)) return fscore def corss_validation(): k = 4 avg_score = 0 x, y = read_data(r'..\data\data_balanced\lexical_vec_avg.txt') for p_st in [x / k for x in range(0, k)]: p_en = p_st + 1 / k datas = split_data(x, y, p_st, p_en) score = sgd_optimization(datas, n_epochs=100) avg_score += score avg_score /= k print('Average score is: {0:f}'.format(avg_score)) if __name__ == '__main__': sgd_optimization(load_data(r'..\data\data_balanced\lexical_vec_avg.txt', sp_idx=3701), n_epochs=1000, batch_size=100) # corss_validation()
for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) # compute f-score on validation set y_preds = [validate_model(i) for i in range(n_valid_batches)] y_pred = [pij for pi in y_preds for pij in pi] y_real = valid_set_y.get_value(borrow=True) fscore, precison, recall = f_score(y_real, y_pred) print( 'epoch {0:d}, fscore {1:f} precision {2:f} recall {3:f}'.format( epoch, fscore, precison, recall)) # if we got the best validation score until now if fscore > best_fscore: best_fscore = fscore print('-----Best score: {0:f}-----'.format(best_fscore)) end_time = time.clock() print('Optimization complete with best validation score of {0:.1f} %,'. format(best_fscore * 100.)) print('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if __name__ == '__main__': test_DBN(load_data(r'D:\workspace\sentiment\data_balanced\filtered.txt'), pretraining_epochs=10, training_epochs=100, batch_size=20, k=1)
end_time = time.clock() print('Optimization complete with best validation score: fscore {0:f} precision {1:f} recall {2:f},' .format(best_fscore[0], best_fscore[1], best_fscore[2])) print('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if seq_output: print('writing sequence output') seq_output_file(r'..\data\data_seq\seq_train_raw.txt', seq_output_train, datasets_modals[0][0][1].get_value(borrow=True)) seq_output_file(r'..\data\data_seq\seq_test_raw.txt', seq_output_valid, datasets_modals[0][1][1].get_value(borrow=True)) def seq_output_file(file, output_data, real_labels): f = open(file, 'w', encoding='utf-8') for feature, predict, real in zip(output_data[0], output_data[1], real_labels): f.write(' '.join(map(lambda i: '{0:.0f}'.format(i * 10), feature))) f.write(' {0:.0f} {1:.0f}\n'.format(predict, real)) f.close() if __name__ == '__main__': # acoustic_data = load_data(r'..\data\data_balanced\acoustic.txt', sp_idx=3701) # text_data = load_data(r'..\data\data_balanced\lexical_vec_avg.txt', sp_idx=3701) # test_mmsda([acoustic_data, text_data], pretraining_epochs=1, training_epochs=3, batch_size=50, seq_output=True) acoustic_data = load_data(r'..\data\data_context\acoustic.txt', sp_idx=23993) text_data = load_data(r'..\data\data_context\lexical_vec_avg.txt', sp_idx=23993) test_mmsda([acoustic_data, text_data], pretraining_epochs=50, training_epochs=400, batch_size=50, seq_output=False)
presig_vis, vis_mfs, vis_samples ], updates ) = theano.scan( rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every ) # add to updates the shared variable that takes care of our persistent # chain :. updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function( [], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn' ) for idx in range(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() # do some output here!!! if __name__ == '__main__': test_rbm(load_data(r'D:\workspace\sentiment\data_balanced\lexical.txt'), n_hidden=500)
epoch = 0 while epoch < training_epochs: epoch += 1 for minibatch_index in range(n_train_batches): minibatch_avg_cost = train_fn(minibatch_index) # compute f-score on validation set y_preds = [validate_model(i) for i in range(n_valid_batches)] y_pred = [pij for pi in y_preds for pij in pi] y_real = valid_set_y.get_value(borrow=True) fscore, precison, recall = f_score(y_real, y_pred) print('epoch {0:d}, fscore {1:f} precision {2:f} recall {3:f}'.format(epoch, fscore, precison, recall)) # if we got the best validation score until now if fscore > best_fscore: best_fscore = fscore print('-----Best score: {0:f}-----'.format(best_fscore)) end_time = time.clock() print('Optimization complete with best validation score of {0:.1f} %,' .format(best_fscore * 100.)) print('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if __name__ == '__main__': test_DBN(load_data(r'D:\workspace\sentiment\data_balanced\filtered.txt'), pretraining_epochs=10, training_epochs=100, batch_size=20, k=1)
# compute f-score on validation set y_preds = [validate_model(i) for i in range(n_valid_batches)] y_pred = [pij for pi in y_preds for pij in pi] y_real = valid_set_y.get_value(borrow=True) print(y_pred) fscore, precison, recall = f_score(y_real, y_pred) print( 'epoch {0:d}, fscore {1:f} precision {2:f} recall {3:f}'.format( epoch, fscore, precison, recall)) # if we got the best validation score until now if fscore > best_fscore[0]: best_fscore = (fscore, precison, recall) print('-----Best score: {0:f}-----'.format(fscore)) end_time = time.clock() print( 'Optimization complete with best validation score: fscore {0:f} precision {1:f} recall {2:f},' .format(best_fscore[0], best_fscore[1], best_fscore[2])) print('The training code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if __name__ == '__main__': # test_SdA(load_data(r'..\data\data_balanced\acous_lex_avg.txt', sp_idx=3701), # pretraining_epochs=50, training_epochs=500, batch_size=50) test_SdA(load_data(r'..\data\data_all\acous_lex_avg.txt', sp_idx=23993), pretraining_epochs=50, training_epochs=500, batch_size=50)
plot_every = 1000 # define one step of Gibbs sampling (mf = mean-field) define a # function that does `plot_every` steps before returning the # sample for plotting ([presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates) = theano.scan( rbm.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=plot_every) # add to updates the shared variable that takes care of our persistent # chain :. updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function([], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn') for idx in range(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() # do some output here!!! if __name__ == '__main__': test_rbm(load_data(r'D:\workspace\sentiment\data_balanced\acoustic.txt'))
# if we got the best validation score until now if fscore > best_fscore: best_fscore = fscore print('-----Best score: {0:f}-----'.format(best_fscore)) end_time = time.clock() print('Optimization complete with best validation score of {0:.1f} %,' .format(best_fscore * 100.)) print('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) def f_score(y_real, y_pred, target=1, label_num=2): if len(y_real) != len(y_pred): raise TypeError( 'y_real should have the same shape as y_pred', ('y_real ', len(y_real), 'y_pred', len(y_pred)) ) count = numpy.zeros([label_num, label_num]) for real, pred in zip(y_real, y_pred): count[real][pred] += 1 precison = count[target][target] / numpy.sum(count, axis=0)[target] recall = count[target][target] / numpy.sum(count, axis=1)[target] fscore = 2 * precison * recall / (precison + recall) return fscore if __name__ == '__main__': test_mlp(load_data(r'D:\workspace\sentiment\data_balanced\lexical.txt'), n_epochs=100)
# compute f-score on validation set y_preds = [validate_model(i) for i in range(n_valid_batches)] y_pred = [pij for pi in y_preds for pij in pi] y_real = valid_set_y.get_value(borrow=True) print(y_pred) fscore, precison, recall = f_score(y_real, y_pred) print( 'epoch {0:d}, fscore {1:f} precision {2:f} recall {3:f}'.format( epoch, fscore, precison, recall)) # if we got the best validation score until now if fscore > best_fscore[0]: best_fscore = (fscore, precison, recall) print('-----Best score: {0:f}-----'.format(fscore)) end_time = time.clock() print( 'Optimization complete with best validation score: fscore {0:f} precision {1:f} recall {2:f},' .format(best_fscore[0], best_fscore[1], best_fscore[2])) print('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if __name__ == '__main__': test_DBN(load_data(r'..\data\data_balanced\acoustic.txt', sp_idx=3701), pretraining_epochs=50, training_epochs=300, batch_size=50, k=1)
batches_idx = list(range(n_train_batches)) while epoch < training_epochs: epoch += 1 random.shuffle(batches_idx) for minibatch_index in batches_idx: minibatch_avg_cost = train_fn(minibatch_index) # compute f-score on validation set y_preds = [validate_model(i) for i in range(n_valid_batches)] y_pred = [pij for pi in y_preds for pij in pi] y_real = valid_set_y.get_value(borrow=True) print(y_pred) fscore, precison, recall = f_score(y_real, y_pred) print('epoch {0:d}, fscore {1:f} precision {2:f} recall {3:f}'.format(epoch, fscore, precison, recall)) # if we got the best validation score until now if fscore > best_fscore[0]: best_fscore = (fscore, precison, recall) print('-----Best score: {0:f}-----'.format(fscore)) end_time = time.clock() print('Optimization complete with best validation score: fscore {0:f} precision {1:f} recall {2:f},' .format(best_fscore[0], best_fscore[1], best_fscore[2])) print('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) if __name__ == '__main__': test_DBN(load_data(r'..\data\data_balanced\acoustic.txt', sp_idx=3701), pretraining_epochs=50, training_epochs=300, batch_size=50, k=1)
updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) start_time = time.clock() ############ # TRAINING # ############ # go through training epochs for epoch in range(training_epochs): # go through trainng set c = [] for batch_index in range(n_train_batches): c.append(train_da(batch_index)[1]) print('batch %d complete' % batch_index) # print 'Training epoch %d, cost ' % epoch, numpy.mean(c) print('Training epoch %d, sample is ' % epoch, c[-1]) end_time = time.clock() training_time = (end_time - start_time) print('The no corruption code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % (training_time / 60.)) if __name__ == '__main__': test_dA(load_data(r'D:\workspace\sentiment\data_balanced\lexical.txt'))
raise TypeError( 'y_real should have the same shape as y_pred', ('y_real ', len(y_real), 'y_pred', len(y_pred)) ) count = numpy.zeros([label_num, label_num]) for real, pred in zip(y_real, y_pred): count[real][pred] += 1 precison = count[target][target] / numpy.sum(count, axis=0)[target] recall = count[target][target] / numpy.sum(count, axis=1)[target] fscore = 2 * precison * recall / (precison + recall) print('p:{0:f} r:{1:f} f:{2:f}'.format(precison, recall, fscore)) return fscore def corss_validation(): k = 4 avg_score = 0 x, y = read_data(r'..\data\data_balanced\lexical_vec_avg.txt') for p_st in [x / k for x in range(0, k)]: p_en = p_st + 1 / k datas = split_data(x, y, p_st, p_en) score = sgd_optimization(datas, n_epochs=100) avg_score += score avg_score /= k print('Average score is: {0:f}'.format(avg_score)) if __name__ == '__main__': sgd_optimization(load_data(r'..\data\data_balanced\lexical_vec_avg.txt', sp_idx=3701), n_epochs=1000, batch_size=100) # corss_validation()