def run(dataset_path=DEFAULT_DATASET, dataset_name='timit', batch_size=100, nframes=13, features="fbank", init_lr=0.01, max_epochs=500, network_type="AB", trainer_type="adadelta", layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], recurrent_connections=[], prefix_fname='', debug_print=0, debug_time=False, debug_plot=0): """ Configures and run the neural net on the given dataset. """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + features + str(nframes) output_file_name += "_" + network_type + "_" + trainer_type output_file_name += "_emb_" + str(DIM_EMBEDDING) print "output file name:", output_file_name n_ins = None n_outs = None print "loading dataset from", dataset_path # TODO DO A FUNCTION FOR DATASET LOADING CRAP if dataset_path[-7:] != '.joblib': print >> sys.stderr, "prepare your dataset with align_words.py or lucid.py or buckeye.py" sys.exit(-1) ### LOADING DATA data_same = joblib.load(dataset_path) shuffle(data_same) has_dev_set = True test_dataset_path = dataset_path[:-7].replace("train", "") + 'test.joblib' dev_split_at = int(0.9 * len(data_same)) test_split_at = len(data_same) if not os.path.exists(test_dataset_path): has_dev_set = False test_split_at = int(0.95 * test_split_at) print data_same[0] print data_same[0][3].shape n_ins = data_same[0][3].shape[1] * nframes n_outs = DIM_EMBEDDING normalize = True min_max_scale = False marginf = (nframes - 1) / 2 # TODO ### TRAIN SET train_set_iterator = DatasetDTWWrdSpkrIterator(data_same[:dev_split_at], normalize=normalize, min_max_scale=min_max_scale, scale_f1=None, scale_f2=None, nframes=nframes, batch_size=batch_size, marginf=marginf) f1 = train_set_iterator._scale_f1 f2 = train_set_iterator._scale_f2 ### DEV SET valid_set_iterator = DatasetDTWWrdSpkrIterator( data_same[dev_split_at:test_split_at], normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) ### TEST SET if has_dev_set: data_same = joblib.load(test_dataset_path) test_set_iterator = DatasetDTWWrdSpkrIterator( data_same, normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) else: test_set_iterator = DatasetDTWWrdSpkrIterator( data_same[test_split_at:], normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' nnet = None fast_dropout = False if "dropout" in network_type: print >> sys.stderr, "Dropout is not implemented for ABnets with 2 Outputs" nnet = DropoutABNeuralNet( numpy_rng=numpy_rng, # TODO with 2 Outputs n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='cos_cos2', rho=0.95, eps=1.E-6, max_norm=4., fast_drop=fast_dropout, debugprint=debug_print) else: nnet = ABNeuralNet2Outputs(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='cos_cos2', rho=0.90, eps=1.E-6, max_norm=0., debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref_w = nnet.score_classif_same_diff_word_separated( train_set_iterator) valid_scoref_w = nnet.score_classif_same_diff_word_separated( valid_set_iterator) test_scoref_w = nnet.score_classif_same_diff_word_separated( test_set_iterator) train_scoref_s = nnet.score_classif_same_diff_spkr_separated( train_set_iterator) valid_scoref_s = nnet.score_classif_same_diff_spkr_separated( valid_set_iterator) test_scoref_s = nnet.score_classif_same_diff_spkr_separated( test_set_iterator) data_iterator = train_set_iterator print '... training the model' best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f, protocol=-1) while (epoch < max_epochs): epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): #print "x[0][0]", x[0][0] #print "x[1][0]", x[1][0] #print "y[0][0]", y[0][0] #print "y[1][0]", y[1][0] avg_cost = 0. if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x[0], x[1], y[0], y[1]) else: avg_cost = train_fn(x[0], x[1], y[0], y[1], lr) if debug_print >= 3: print "cost:", avg_cost[0] if debug_plot >= 2: plot_costs(avg_cost[0]) if not len(avg_params_gradients_updates): avg_params_gradients_updates = map(numpy.asarray, avg_cost[1:]) else: avg_params_gradients_updates = rolling_avg_pgu( iteration, avg_params_gradients_updates, map(numpy.asarray, avg_cost[1:])) if debug_plot >= 3: plot_params_gradients_updates(iteration, avg_cost[1:]) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) avg_cost = numpy.mean(avg_costs) if numpy.isnan(avg_cost): print("avg costs is NaN so we're stopping here!") break print(' epoch %i, avg costs %f' % \ (epoch, avg_cost)) tmp_train = zip(*train_scoref_w()) print(' epoch %i, training sim same words %f, diff words %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) tmp_train = zip(*train_scoref_s()) print(' epoch %i, training sim same spkrs %f, diff spkrs %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) lr = numpy.float32(init_lr / (numpy.sqrt(iteration) + 1.)) ### TODO # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f, protocol=-1) # we check the validation loss on every epoch validation_losses_w = zip(*valid_scoref_w()) validation_losses_s = zip(*valid_scoref_s()) this_validation_loss = 0.25*(1.-numpy.mean(validation_losses_w[0])) +\ 0.25*numpy.mean(validation_losses_w[1]) +\ 0.25*(1.-numpy.mean(validation_losses_s[0])) +\ 0.25*numpy.mean(validation_losses_s[1]) print(' epoch %i, valid sim same words %f, diff words %f' % \ (epoch, numpy.mean(validation_losses_w[0]), numpy.mean(validation_losses_w[1]))) print(' epoch %i, valid sim same spkrs %f, diff spkrs %f' % \ (epoch, numpy.mean(validation_losses_s[0]), numpy.mean(validation_losses_s[1]))) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses_w = zip(*test_scoref_w()) test_losses_s = zip(*test_scoref_s()) print(' epoch %i, test sim same words %f, diff words %f' % \ (epoch, numpy.mean(test_losses_w[0]), numpy.mean(test_losses_w[1]))) print(' epoch %i, test sim same spkrs %f, diff spkrs %f' % \ (epoch, numpy.mean(test_losses_s[0]), numpy.mean(test_losses_s[1]))) end_time = time.clock() print(('Optimization complete with best validation score of %f, ' 'with test performance %f') % (best_validation_loss, test_score)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1)
def run(dataset_path=DEFAULT_DATASET, dataset_name='timit', iterator_type=DatasetDTWIterator, batch_size=100, nframes=13, features="fbank", init_lr=0.01, max_epochs=500, network_type="dropout_net", trainer_type="adadelta", layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], recurrent_connections=[], prefix_fname='', debug_on_test_only=False, debug_print=0, debug_time=False, debug_plot=0): """ FIXME TODO """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + features + str(nframes) output_file_name += "_" + network_type + "_" + trainer_type output_file_name += "_emb_" + str(DIM_EMBEDDING) print "output file name:", output_file_name n_ins = None n_outs = None print "loading dataset from", dataset_path # TODO DO A FUNCTION if dataset_path[-7:] != '.joblib': print >> sys.stderr, "prepare your dataset with align_words.py or lucid.py or buckeye.py" sys.exit(-1) ### LOADING DATA data_same = joblib.load(dataset_path) shuffle(data_same) has_dev_and_test_set = True has_test_set_only = False dev_dataset_path = dataset_path[:-7].replace("train", "") + 'dev.joblib' test_dataset_path = dataset_path[:-7].replace("train", "") + 'test.joblib' dev_split_at = len(data_same) test_split_at = len(data_same) if not os.path.exists(dev_dataset_path) or not os.path.exists(test_dataset_path): has_dev_and_test_set = False if os.path.exists(test_dataset_path): print >> sys.stderr, "DOESN'T HAVE A SEPARATED DEV SET, WE'LL SPLIT OUT OWN" has_test_set_only = True dev_split_at = int(0.9 * dev_split_at) else: print >> sys.stderr, "DOESN'T HAVE A SEPARATED DEV AND TEST SET, WE'LL SPLIT OUT OWNS" dev_split_at = int(0.8 * dev_split_at) test_split_at = int(0.9 * test_split_at) print data_same[0] print data_same[0][3].shape n_ins = data_same[0][3].shape[1] * nframes n_outs = DIM_EMBEDDING normalize = True min_max_scale = False marginf = (nframes-1)/2 # TODO ### TRAIN SET if has_dev_and_test_set: train_set_iterator = DatasetDTWWrdSpkrIterator(data_same, normalize=normalize, min_max_scale=min_max_scale, scale_f1=None, scale_f2=None, nframes=nframes, batch_size=batch_size, marginf=marginf) else: train_set_iterator = DatasetDTWWrdSpkrIterator( data_same[:dev_split_at], normalize=normalize, min_max_scale=min_max_scale, scale_f1=None, scale_f2=None, nframes=nframes, batch_size=batch_size, marginf=marginf) f1 = train_set_iterator._scale_f1 f2 = train_set_iterator._scale_f2 ### DEV SET if has_dev_and_test_set: data_same = joblib.load(dev_dataset_path) valid_set_iterator = DatasetDTWWrdSpkrIterator(data_same, normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) else: valid_set_iterator = DatasetDTWWrdSpkrIterator( data_same[dev_split_at:test_split_at], normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) ### TEST SET if has_dev_and_test_set or has_test_set_only: data_same = joblib.load(test_dataset_path) test_set_iterator = DatasetDTWWrdSpkrIterator(data_same, normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) else: test_set_iterator = DatasetDTWWrdSpkrIterator( data_same[test_split_at:], normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # TODO the proper network type other than just dropout or not nnet = None fast_dropout = False if "dropout" in network_type: nnet = DropoutABNeuralNet(numpy_rng=numpy_rng, # TODO with 2 Outputs n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='cos_cos2', rho=0.95, eps=1.E-6, max_norm=4., fast_drop=fast_dropout, debugprint=debug_print) else: nnet = ABNeuralNet2Outputs(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='cos_cos2', #loss='dot_prod', rho=0.90, eps=1.E-6, max_norm=0., debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref_w = nnet.score_classif_same_diff_word_separated(train_set_iterator) valid_scoref_w = nnet.score_classif_same_diff_word_separated(valid_set_iterator) test_scoref_w = nnet.score_classif_same_diff_word_separated(test_set_iterator) train_scoref_s = nnet.score_classif_same_diff_spkr_separated(train_set_iterator) valid_scoref_s = nnet.score_classif_same_diff_spkr_separated(valid_set_iterator) test_scoref_s = nnet.score_classif_same_diff_spkr_separated(test_set_iterator) data_iterator = train_set_iterator if debug_on_test_only: print >> sys.stderr, "NOT IMPLEMENTED" sys.exit(-1) data_iterator = test_set_iterator train_scoref_w = test_scoref_w train_scoref_s = test_scoref_s print '... training the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f, protocol=-1) while (epoch < max_epochs) and (not done_looping): if REDTW and "ab_net" in network_type and ((epoch + 1) % 20) == 0: print "recomputing DTW:" data_iterator.recompute_DTW(nnet.transform_x1()) epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): #print "x[0][0]", x[0][0] #print "x[1][0]", x[1][0] #print "y[0][0]", y[0][0] #print "y[1][0]", y[1][0] avg_cost = 0. if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x[0], x[1], y[0], y[1]) else: avg_cost = train_fn(x[0], x[1], y[0], y[1], lr) if debug_print >= 3: print "cost:", avg_cost[0] if debug_plot >= 2: plot_costs(avg_cost[0]) if not len(avg_params_gradients_updates): avg_params_gradients_updates = map(numpy.asarray, avg_cost[1:]) else: avg_params_gradients_updates = rolling_avg_pgu( iteration, avg_params_gradients_updates, map(numpy.asarray, avg_cost[1:])) if debug_plot >= 3: plot_params_gradients_updates(iteration, avg_cost[1:]) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) avg_cost = numpy.mean(avg_costs) if numpy.isnan(avg_cost): print("avg costs is NaN so we're stopping here!") break print(' epoch %i, avg costs %f' % \ (epoch, avg_cost)) tmp_train = zip(*train_scoref_w()) print(' epoch %i, training sim same words %f, diff words %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) tmp_train = zip(*train_scoref_s()) print(' epoch %i, training sim same spkrs %f, diff spkrs %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) lr = numpy.float32(init_lr / (numpy.sqrt(iteration) + 1.)) ### TODO #lr = numpy.float32(init_lr / (iteration + 1.)) ### TODO # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f, protocol=-1) if debug_on_test_only: continue # we check the validation loss on every epoch validation_losses_w = zip(*valid_scoref_w()) validation_losses_s = zip(*valid_scoref_s()) this_validation_loss = 0.25*(1.-numpy.mean(validation_losses_w[0])) +\ 0.25*numpy.mean(validation_losses_w[1]) +\ 0.25*(1.-numpy.mean(validation_losses_s[0])) +\ 0.25*numpy.mean(validation_losses_s[1]) print(' epoch %i, valid sim same words %f, diff words %f' % \ (epoch, numpy.mean(validation_losses_w[0]), numpy.mean(validation_losses_w[1]))) print(' epoch %i, valid sim same spkrs %f, diff spkrs %f' % \ (epoch, numpy.mean(validation_losses_s[0]), numpy.mean(validation_losses_s[1]))) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses_w = zip(*test_scoref_w()) test_losses_s = zip(*test_scoref_s()) print(' epoch %i, test sim same words %f, diff words %f' % \ (epoch, numpy.mean(test_losses_w[0]), numpy.mean(test_losses_w[1]))) print(' epoch %i, test sim same spkrs %f, diff spkrs %f' % \ (epoch, numpy.mean(test_losses_s[0]), numpy.mean(test_losses_s[1]))) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f, ' 'with test performance %f') % (best_validation_loss, test_score)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1)
def run(dataset_path=DEFAULT_DATASET, dataset_name='timit', iterator_type=DatasetDTWIterator, batch_size=100, nframes=13, features="fbank", init_lr=0.001, max_epochs=500, network_type="dropout_net", trainer_type="adadelta", layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], recurrent_connections=[], prefix_fname='', debug_on_test_only=False, debug_print=0, debug_time=False, debug_plot=0): """ FIXME TODO """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + features + str(nframes) output_file_name += "_" + network_type + "_" + trainer_type output_file_name += "_emb_" + str(DIM_EMBEDDING) print "output file name:", output_file_name n_ins = None n_outs = None print "loading dataset from", dataset_path # TODO DO A FUNCTION if dataset_path[-7:] == '.joblib': if REDTW: data_same = joblib.load(dataset_path) shuffle(data_same) ten_percent = int(0.1 * len(data_same)) x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] mean = numpy.mean(x_arr_same, 0) std = numpy.std(x_arr_same, 0) numpy.savez("mean_std_3", mean=mean, std=std) print x_arr_same.shape print "mean:", mean print "std:", std marginf = 0#(nframes-1)/2 # TODO train_set_iterator = iterator_type(data_same[:-ten_percent], mean, std, nframes=nframes, batch_size=batch_size, marginf=marginf) valid_set_iterator = iterator_type(data_same[-ten_percent:], mean, std, nframes=nframes, batch_size=batch_size, marginf=marginf) #test_dataset_path = dataset_path[:-7].replace("train", "test") + '.joblib' test_dataset_path = dataset_path[:-7].replace("train", "dev") + '.joblib' data_same = joblib.load(test_dataset_path) test_set_iterator = iterator_type(data_same, mean, std, nframes=nframes, batch_size=batch_size, marginf=marginf, only_same=True) n_ins = mean.shape[0] * nframes n_outs = DIM_EMBEDDING else: data_same = joblib.load(dataset_path) #data_same = [(word_label, talker1, talker2, fbanks1, fbanks2, DTW_cost, DTW_1to2, DTW_2to1)] print "number of word paired:", len(data_same) if debug_print: # some stats on the DTW dtw_costs = zip(*data_same)[5] words_frames = numpy.asarray([fb.shape[0] for fb in zip(*data_same)[3]]) print "mean DTW cost", numpy.mean(dtw_costs), "std dev", numpy.std(dtw_costs) print "mean word length in frames", numpy.mean(words_frames), "std dev", numpy.std(words_frames) print "mean DTW cost per frame", numpy.mean(dtw_costs/words_frames), "std dev", numpy.std(dtw_costs/words_frames) # generate data_diff: # spkr_words = {} same_spkr = 0 for i, tup in enumerate(data_same): # spkr_words[tup[1]].append((i, 0)) # spkr_words[tup[2]].append((i, 1)) if tup[1] == tup[2]: same_spkr += 1 # to_del = [] # for spkr, words in spkr_words.iteritems(): # if len(words) < 2: # to_del.append(spkr) # print "to del len:", len(to_del) # for td in to_del: # del spkr_words[td] ratio = same_spkr * 1. / len(data_same) print "ratio same spkr / all for same:", ratio data_diff = [] # keys = spkr_words.keys() # lkeys = len(keys) - 1 ldata_same = len(data_same)-1 same_spkr_diff = 0 for i in xrange(len(data_same)): word_1 = random.randint(0, ldata_same) word_1_type = data_same[word_1][0] word_2 = random.randint(0, ldata_same) while data_same[word_2][0] == word_1_type: word_2 = random.randint(0, ldata_same) wt1 = random.randint(0, 1) wt2 = random.randint(0, 1) if data_same[word_1][1+wt1] == data_same[word_2][1+wt2]: same_spkr_diff += 1 p1 = data_same[word_1][3+wt1] p2 = data_same[word_2][3+wt2] r1 = p1[:min(len(p1), len(p2))] r2 = p2[:min(len(p1), len(p2))] data_diff.append((r1, r2)) ratio = same_spkr_diff * 1. / len(data_diff) print "ratio same spkr / all for diff:", ratio x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] print x_arr_same.shape x_arr_diff = numpy.r_[numpy.concatenate([e[0] for e in data_diff]), numpy.concatenate([e[1] for e in data_diff])] print x_arr_diff.shape x_arr_all = numpy.concatenate([x_arr_same, x_arr_diff]) mean = numpy.mean(x_arr_all, 0) std = numpy.std(x_arr_all, 0) numpy.savez("mean_std_3", mean=mean, std=std) x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std) for e in data_same] shuffle(x_same) # in place y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)] x_diff = [((e[0] - mean) / std, (e[1] - mean) / std) for e in data_diff] #shuffle(x_diff) y_diff = [[0 for _ in xrange(len(e[0]))] for i, e in enumerate(x_diff)] y = [j for i in zip(y_same, y_diff) for j in i] x = [j for i in zip(x_same, x_diff) for j in i] x1, x2 = zip(*x) assert x1[0].shape[0] == x2[0].shape[0] assert x1[0].shape[1] == x2[0].shape[1] assert len(x1) == len(x2) assert len(x1) == len(y) ten_percent = int(0.1 * len(x1)) n_ins = x1[0].shape[1] * nframes n_outs = DIM_EMBEDDING print "nframes:", nframes marginf = (nframes-1)/2 # TODO train_set_iterator = iterator_type(x1[:-ten_percent], x2[:-ten_percent], y[:-ten_percent], # TODO nframes=nframes, batch_size=batch_size, marginf=marginf) valid_set_iterator = iterator_type(x1[-ten_percent:], x2[-ten_percent:], y[-ten_percent:], # TODO nframes=nframes, batch_size=batch_size, marginf=marginf) ### TEST SET test_dataset_path = dataset_path[:-7].replace("train", "dev") + '.joblib' data_same = joblib.load(test_dataset_path) # DO ONLY SAME x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] print x_arr_same.shape x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std) for e in data_same] shuffle(x_same) # in place y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)] x = x_same y = y_same x1, x2 = zip(*x) test_set_iterator = iterator_type(x1, x2, y, nframes=nframes, batch_size=batch_size, marginf=marginf) else: data = load_data(dataset_path, nframes=1, features=features, scaling='normalize', cv_frac='fixed', speakers=False, numpy_array_only=True) train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] assert train_set_x.shape[1] == valid_set_x.shape[1] assert test_set_x.shape[1] == valid_set_x.shape[1] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) n_outs = len(set(train_set_y)) to_int = {} with open(dataset_name + '_to_int_and_to_state_dicts_tuple.pickle') as f: to_int, _ = cPickle.load(f) print "nframes:", nframes train_set_iterator = iterator_type(train_set_x, train_set_y, to_int, nframes=nframes, batch_size=batch_size) valid_set_iterator = iterator_type(valid_set_x, valid_set_y, to_int, nframes=nframes, batch_size=batch_size) test_set_iterator = iterator_type(test_set_x, test_set_y, to_int, nframes=nframes, batch_size=batch_size) n_ins = test_set_x.shape[1]*nframes assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # TODO the proper network type other than just dropout or not nnet = None fast_dropout = False if "fast_dropout" in network_type: fast_dropout = True if "ab_net" in network_type or "abnet" in network_type: if "dropout" in network_type: print "dropout ab net" nnet = DropoutABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='cos_cos2', rho=0.95, eps=1.E-6, max_norm=4., fast_drop=fast_dropout, debugprint=debug_print) else: print "ab net" nnet = ABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='dot_prod', rho=0.95, eps=1.E-6, max_norm=0., debugprint=debug_print) else: if "dropout" in network_type: nnet = DropoutNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, dropout_rates=dropout_rates, n_outs=n_outs, rho=0.95, eps=1.E-6, max_norm=0., fast_drop=fast_dropout, debugprint=debug_print) else: nnet = NeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, rho=0.92, eps=1.E-6, max_norm=0., debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref = nnet.score_classif_same_diff_separated(train_set_iterator) valid_scoref = nnet.score_classif_same_diff_separated(valid_set_iterator) test_scoref = nnet.score_classif(test_set_iterator) data_iterator = train_set_iterator if debug_on_test_only: data_iterator = test_set_iterator train_scoref = test_scoref print '... training the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f) while (epoch < max_epochs) and (not done_looping): if REDTW and "ab_net" in network_type and ((epoch + 1) % 20) == 0: print "recomputing DTW:" data_iterator.recompute_DTW(nnet.transform_x1()) epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): avg_cost = 0. if "ab_net" in network_type or "abnet" in network_type: # remove need for this if if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x[0], x[1], y) else: avg_cost = train_fn(x[0], x[1], y, lr) if debug_print >= 3: print "cost:", avg_cost[0] if debug_plot >= 2: plot_costs(avg_cost[0]) if not len(avg_params_gradients_updates): avg_params_gradients_updates = map(numpy.asarray, avg_cost[1:]) else: avg_params_gradients_updates = rolling_avg_pgu( iteration, avg_params_gradients_updates, map(numpy.asarray, avg_cost[1:])) if debug_plot >= 3: plot_params_gradients_updates(iteration, avg_cost[1:]) else: if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x, y) else: avg_cost = train_fn(x, y, lr) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) avg_cost = numpy.mean(avg_costs) if numpy.isnan(avg_cost): print("avg costs is NaN so we're stopping here!") break print(' epoch %i, avg costs %f' % \ (epoch, avg_cost)) tmp_train = zip(*train_scoref()) print(' epoch %i, training error same %f, diff %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f, protocol=-1) if debug_on_test_only: continue # we check the validation loss on every epoch validation_losses = zip(*valid_scoref()) #this_validation_loss = -numpy.mean(validation_losses[0]) # TODO this is a mean of means (with different lengths) this_validation_loss = 0.5*(1.-numpy.mean(validation_losses[0])) +\ 0.5*numpy.mean(validation_losses[1]) print(' epoch %i, valid error same %f, diff %f' % \ (epoch, numpy.mean(validation_losses[0]), numpy.mean(validation_losses[1]))) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score_same = numpy.mean(test_losses[0]) # TODO this is a mean of means (with different lengths) test_score_diff = numpy.mean(test_losses[1]) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of best model same %f diff %f') % (epoch, test_score_same, test_score_diff)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f, ' 'with test performance %f') % (best_validation_loss, test_score)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1)
def run(dataset_path=DEFAULT_DATASET, dataset_name='timit', iterator_type=DatasetDTWIterator, batch_size=100, nframes=13, features="fbank", init_lr=0.001, max_epochs=500, network_type="dropout_net", trainer_type="adadelta", layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], recurrent_connections=[], prefix_fname='', debug_on_test_only=False, debug_print=0, debug_time=False, debug_plot=0): """ FIXME TODO """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + features + str(nframes) output_file_name += "_" + network_type + "_" + trainer_type output_file_name += "_emb_" + str(DIM_EMBEDDING) print "output file name:", output_file_name n_ins = None n_outs = None print "loading dataset from", dataset_path # TODO DO A FUNCTION if dataset_path[-7:] == '.joblib': if REDTW: data_same = joblib.load(dataset_path) shuffle(data_same) ten_percent = int(0.1 * len(data_same)) x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] mean = numpy.mean(x_arr_same, 0) std = numpy.std(x_arr_same, 0) numpy.savez("mean_std_3", mean=mean, std=std) print x_arr_same.shape print "mean:", mean print "std:", std marginf = 0 #(nframes-1)/2 # TODO train_set_iterator = iterator_type(data_same[:-ten_percent], mean, std, nframes=nframes, batch_size=batch_size, marginf=marginf) valid_set_iterator = iterator_type(data_same[-ten_percent:], mean, std, nframes=nframes, batch_size=batch_size, marginf=marginf) #test_dataset_path = dataset_path[:-7].replace("train", "test") + '.joblib' test_dataset_path = dataset_path[:-7].replace("train", "dev") + '.joblib' data_same = joblib.load(test_dataset_path) test_set_iterator = iterator_type(data_same, mean, std, nframes=nframes, batch_size=batch_size, marginf=marginf, only_same=True) n_ins = mean.shape[0] * nframes n_outs = DIM_EMBEDDING else: data_same = joblib.load(dataset_path) #data_same = [(word_label, talker1, talker2, fbanks1, fbanks2, DTW_cost, DTW_1to2, DTW_2to1)] print "number of word paired:", len(data_same) if debug_print: # some stats on the DTW dtw_costs = zip(*data_same)[5] words_frames = numpy.asarray( [fb.shape[0] for fb in zip(*data_same)[3]]) print "mean DTW cost", numpy.mean( dtw_costs), "std dev", numpy.std(dtw_costs) print "mean word length in frames", numpy.mean( words_frames), "std dev", numpy.std(words_frames) print "mean DTW cost per frame", numpy.mean( dtw_costs / words_frames), "std dev", numpy.std( dtw_costs / words_frames) # generate data_diff: # spkr_words = {} same_spkr = 0 for i, tup in enumerate(data_same): # spkr_words[tup[1]].append((i, 0)) # spkr_words[tup[2]].append((i, 1)) if tup[1] == tup[2]: same_spkr += 1 # to_del = [] # for spkr, words in spkr_words.iteritems(): # if len(words) < 2: # to_del.append(spkr) # print "to del len:", len(to_del) # for td in to_del: # del spkr_words[td] ratio = same_spkr * 1. / len(data_same) print "ratio same spkr / all for same:", ratio data_diff = [] # keys = spkr_words.keys() # lkeys = len(keys) - 1 ldata_same = len(data_same) - 1 same_spkr_diff = 0 for i in xrange(len(data_same)): word_1 = random.randint(0, ldata_same) word_1_type = data_same[word_1][0] word_2 = random.randint(0, ldata_same) while data_same[word_2][0] == word_1_type: word_2 = random.randint(0, ldata_same) wt1 = random.randint(0, 1) wt2 = random.randint(0, 1) if data_same[word_1][1 + wt1] == data_same[word_2][1 + wt2]: same_spkr_diff += 1 p1 = data_same[word_1][3 + wt1] p2 = data_same[word_2][3 + wt2] r1 = p1[:min(len(p1), len(p2))] r2 = p2[:min(len(p1), len(p2))] data_diff.append((r1, r2)) ratio = same_spkr_diff * 1. / len(data_diff) print "ratio same spkr / all for diff:", ratio x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] print x_arr_same.shape x_arr_diff = numpy.r_[numpy.concatenate([e[0] for e in data_diff]), numpy.concatenate([e[1] for e in data_diff])] print x_arr_diff.shape x_arr_all = numpy.concatenate([x_arr_same, x_arr_diff]) mean = numpy.mean(x_arr_all, 0) std = numpy.std(x_arr_all, 0) numpy.savez("mean_std_3", mean=mean, std=std) x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std) for e in data_same] shuffle(x_same) # in place y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)] x_diff = [((e[0] - mean) / std, (e[1] - mean) / std) for e in data_diff] #shuffle(x_diff) y_diff = [[0 for _ in xrange(len(e[0]))] for i, e in enumerate(x_diff)] y = [j for i in zip(y_same, y_diff) for j in i] x = [j for i in zip(x_same, x_diff) for j in i] x1, x2 = zip(*x) assert x1[0].shape[0] == x2[0].shape[0] assert x1[0].shape[1] == x2[0].shape[1] assert len(x1) == len(x2) assert len(x1) == len(y) ten_percent = int(0.1 * len(x1)) n_ins = x1[0].shape[1] * nframes n_outs = DIM_EMBEDDING print "nframes:", nframes marginf = (nframes - 1) / 2 # TODO train_set_iterator = iterator_type( x1[:-ten_percent], x2[:-ten_percent], y[:-ten_percent], # TODO nframes=nframes, batch_size=batch_size, marginf=marginf) valid_set_iterator = iterator_type( x1[-ten_percent:], x2[-ten_percent:], y[-ten_percent:], # TODO nframes=nframes, batch_size=batch_size, marginf=marginf) ### TEST SET test_dataset_path = dataset_path[:-7].replace("train", "dev") + '.joblib' data_same = joblib.load(test_dataset_path) # DO ONLY SAME x_arr_same = numpy.r_[numpy.concatenate([e[3] for e in data_same]), numpy.concatenate([e[4] for e in data_same])] print x_arr_same.shape x_same = [((e[3][e[-2]] - mean) / std, (e[4][e[-1]] - mean) / std) for e in data_same] shuffle(x_same) # in place y_same = [[1 for _ in xrange(len(e[0]))] for i, e in enumerate(x_same)] x = x_same y = y_same x1, x2 = zip(*x) test_set_iterator = iterator_type(x1, x2, y, nframes=nframes, batch_size=batch_size, marginf=marginf) else: data = load_data(dataset_path, nframes=1, features=features, scaling='normalize', cv_frac='fixed', speakers=False, numpy_array_only=True) train_set_x, train_set_y = data[0] valid_set_x, valid_set_y = data[1] test_set_x, test_set_y = data[2] assert train_set_x.shape[1] == valid_set_x.shape[1] assert test_set_x.shape[1] == valid_set_x.shape[1] print "dataset loaded!" print "train set size", train_set_x.shape[0] print "validation set size", valid_set_x.shape[0] print "test set size", test_set_x.shape[0] print "phones in train", len(set(train_set_y)) print "phones in valid", len(set(valid_set_y)) print "phones in test", len(set(test_set_y)) n_outs = len(set(train_set_y)) to_int = {} with open(dataset_name + '_to_int_and_to_state_dicts_tuple.pickle') as f: to_int, _ = cPickle.load(f) print "nframes:", nframes train_set_iterator = iterator_type(train_set_x, train_set_y, to_int, nframes=nframes, batch_size=batch_size) valid_set_iterator = iterator_type(valid_set_x, valid_set_y, to_int, nframes=nframes, batch_size=batch_size) test_set_iterator = iterator_type(test_set_x, test_set_y, to_int, nframes=nframes, batch_size=batch_size) n_ins = test_set_x.shape[1] * nframes assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # TODO the proper network type other than just dropout or not nnet = None fast_dropout = False if "fast_dropout" in network_type: fast_dropout = True if "ab_net" in network_type or "abnet" in network_type: if "dropout" in network_type: print "dropout ab net" nnet = DropoutABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='cos_cos2', rho=0.95, eps=1.E-6, max_norm=4., fast_drop=fast_dropout, debugprint=debug_print) else: print "ab net" nnet = ABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='dot_prod', rho=0.95, eps=1.E-6, max_norm=0., debugprint=debug_print) else: if "dropout" in network_type: nnet = DropoutNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, dropout_rates=dropout_rates, n_outs=n_outs, rho=0.95, eps=1.E-6, max_norm=0., fast_drop=fast_dropout, debugprint=debug_print) else: nnet = NeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, rho=0.92, eps=1.E-6, max_norm=0., debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref = nnet.score_classif_same_diff_separated(train_set_iterator) valid_scoref = nnet.score_classif_same_diff_separated(valid_set_iterator) test_scoref = nnet.score_classif(test_set_iterator) data_iterator = train_set_iterator if debug_on_test_only: data_iterator = test_set_iterator train_scoref = test_scoref print '... training the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f) while (epoch < max_epochs) and (not done_looping): if REDTW and "ab_net" in network_type and ((epoch + 1) % 20) == 0: print "recomputing DTW:" data_iterator.recompute_DTW(nnet.transform_x1()) epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): avg_cost = 0. if "ab_net" in network_type or "abnet" in network_type: # remove need for this if if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x[0], x[1], y) else: avg_cost = train_fn(x[0], x[1], y, lr) if debug_print >= 3: print "cost:", avg_cost[0] if debug_plot >= 2: plot_costs(avg_cost[0]) if not len(avg_params_gradients_updates): avg_params_gradients_updates = map( numpy.asarray, avg_cost[1:]) else: avg_params_gradients_updates = rolling_avg_pgu( iteration, avg_params_gradients_updates, map(numpy.asarray, avg_cost[1:])) if debug_plot >= 3: plot_params_gradients_updates(iteration, avg_cost[1:]) else: if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x, y) else: avg_cost = train_fn(x, y, lr) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) avg_cost = numpy.mean(avg_costs) if numpy.isnan(avg_cost): print("avg costs is NaN so we're stopping here!") break print(' epoch %i, avg costs %f' % \ (epoch, avg_cost)) tmp_train = zip(*train_scoref()) print(' epoch %i, training error same %f, diff %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f, protocol=-1) if debug_on_test_only: continue # we check the validation loss on every epoch validation_losses = zip(*valid_scoref()) #this_validation_loss = -numpy.mean(validation_losses[0]) # TODO this is a mean of means (with different lengths) this_validation_loss = 0.5*(1.-numpy.mean(validation_losses[0])) +\ 0.5*numpy.mean(validation_losses[1]) print(' epoch %i, valid error same %f, diff %f' % \ (epoch, numpy.mean(validation_losses[0]), numpy.mean(validation_losses[1]))) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score_same = numpy.mean( test_losses[0] ) # TODO this is a mean of means (with different lengths) test_score_diff = numpy.mean( test_losses[1] ) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of best model same %f diff %f') % (epoch, test_score_same, test_score_diff)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f, ' 'with test performance %f') % (best_validation_loss, test_score)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1)
def run(dataset_path=DEFAULT_DATASET, dataset_name='mnist', iterator_type=DatasetABIterator, batch_size=100, init_lr=0.001, max_epochs=500, network_type="dropout_net", trainer_type="adadelta", layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], recurrent_connections=[], prefix_fname='', debug_on_test_only=False, debug_print=0, debug_time=False, debug_plot=0): """ FIXME TODO """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + network_type + "_" + trainer_type output_file_name += "_emb_" + str(DIM_EMBEDDING) print "output file name:", output_file_name n_ins = None n_outs = None if dataset_path[-7:] == '.joblib': test_dataset_path = dataset_path.replace('train', 'test') print "loading dataset from", dataset_path, "and", test_dataset_path x1_train, x2_train, y_train = joblib.load(dataset_path) if numpy.max(x1_train) > 1: x1_train = numpy.asarray(x1_train, dtype='float32') / 255 if numpy.max(x2_train) > 1: x2_train = numpy.asarray(x2_train, dtype='float32') / 255 x1_test, x2_test, y_test = joblib.load(test_dataset_path) if numpy.max(x1_test) > 1: x1_test = numpy.asarray(x1_test, dtype='float32') / 255 if numpy.max(x2_test) > 1: x2_test = numpy.asarray(x2_test, dtype='float32') / 255 ten_percent = int(0.1 * x1_train.shape[0]) train_set_iterator = iterator_type(x1_train[:-ten_percent], x2_train[:-ten_percent], y_train[:-ten_percent], batch_size=batch_size) valid_set_iterator = iterator_type(x1_train[-ten_percent:], x2_train[-ten_percent:], y_train[-ten_percent:], batch_size=batch_size) test_set_iterator = iterator_type(x1_test, x2_test, y_test, batch_size=batch_size) n_ins = x1_train.shape[1] n_outs = DIM_EMBEDDING else: SCALE = True N_SAMPLES = 10 from sklearn.datasets import fetch_mldata mnist = fetch_mldata('MNIST original') X = numpy.asarray(mnist.data, dtype='uint8') if SCALE: X = numpy.asarray(X, dtype='float32') X /= 255. y = numpy.asarray(mnist.target, dtype='uint8') X_train = X[:60000] y_train = y[:60000] xy = numpy.ndarray((X_train.shape[0], X_train.shape[1] + 1), dtype='float32') xy[:, :-1] = X_train xy[:, -1] = y_train numpy.random.shuffle(xy) ten_percent = int(0.1 * X_train.shape[0]) X_train = xy[:-ten_percent, :-1] y_train = xy[:-ten_percent, -1] X_valid = xy[-ten_percent:, :-1] y_valid = xy[-ten_percent:, -1] X_test = X[60000:] y_test = y[60000:] xy = numpy.ndarray((X_test.shape[0], X_test.shape[1] + 1), dtype='float32') xy[:, :-1] = X_test xy[:, -1] = y_test numpy.random.shuffle(xy) X_test = xy[:, :-1] y_test = xy[:, -1] print X_train.shape print X_valid.shape print X_test.shape train_set_iterator = DatasetABSamplingIteratorFromLabels( X_train, y_train, n_samples=N_SAMPLES, batch_size=batch_size) valid_set_iterator = DatasetABSamplingIteratorFromLabels( X_valid, y_valid, n_samples=N_SAMPLES, batch_size=batch_size) test_set_iterator = DatasetABSamplingIteratorFromLabels( X_test, y_test, n_samples=N_SAMPLES, batch_size=batch_size) n_ins = X_train.shape[1] n_outs = DIM_EMBEDDING assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # TODO the proper network type other than just dropout or not nnet = None fast_dropout = False if "fast_dropout" in network_type: fast_dropout = True if "dropout" in network_type: nnet = DropoutABNeuralNet( numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, #loss='cos_cos2', loss='hellinger', rho=0.95, eps=1.E-6, max_norm=4., fast_drop=fast_dropout, debugprint=debug_print) else: nnet = ABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='cos_cos2', rho=0.9, eps=1.E-6, max_norm=4., debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref = nnet.score_classif_same_diff_separated(train_set_iterator) valid_scoref = nnet.score_classif_same_diff_separated(valid_set_iterator) test_scoref = nnet.score_classif(test_set_iterator) data_iterator = train_set_iterator if debug_on_test_only: data_iterator = test_set_iterator train_scoref = test_scoref print '... training the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f) while (epoch < max_epochs) and (not done_looping): epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): avg_cost = 0. if "ab_net" in network_type: # remove need for this if if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x[0], x[1], y) else: avg_cost = train_fn(x[0], x[1], y, lr) if debug_print >= 3: print "cost:", avg_cost[0] if debug_plot >= 2: plot_costs(avg_cost[0]) if not len(avg_params_gradients_updates): avg_params_gradients_updates = map( numpy.asarray, avg_cost[1:]) else: avg_params_gradients_updates = rolling_avg_pgu( iteration, avg_params_gradients_updates, map(numpy.asarray, avg_cost[1:])) if debug_plot >= 3: plot_params_gradients_updates(iteration, avg_cost[1:]) else: if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x, y) else: avg_cost = train_fn(x, y, lr) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) print(' epoch %i, avg costs %f' % \ (epoch, numpy.mean(avg_costs))) tmp_train = zip(*train_scoref()) print(' epoch %i, training error same %f, diff %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f) if debug_on_test_only: continue # we check the validation loss on every epoch validation_losses = zip(*valid_scoref()) #this_validation_loss = -numpy.mean(validation_losses[0]) # TODO this is a mean of means (with different lengths) this_validation_loss = 0.5*(1.-numpy.mean(validation_losses[0])) +\ 0.5*numpy.mean(validation_losses[1]) print(' epoch %i, valid error same %f, diff %f' % \ (epoch, numpy.mean(validation_losses[0]), numpy.mean(validation_losses[1]))) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score_same = numpy.mean( test_losses[0] ) # TODO this is a mean of means (with different lengths) test_score_diff = numpy.mean( test_losses[1] ) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of best model same %f diff %f') % (epoch, test_score_same, test_score_diff)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f, ' 'with test performance %f') % (best_validation_loss, test_score)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f)
def run(dataset_path=DEFAULT_DATASET, dataset_name='mnist', iterator_type=DatasetABIterator, batch_size=100, init_lr=0.001, max_epochs=500, network_type="dropout_net", trainer_type="adadelta", layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], recurrent_connections=[], prefix_fname='', debug_on_test_only=False, debug_print=0, debug_time=False, debug_plot=0): """ FIXME TODO """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + network_type + "_" + trainer_type output_file_name += "_emb_" + str(DIM_EMBEDDING) print "output file name:", output_file_name n_ins = None n_outs = None if dataset_path[-7:] == '.joblib': test_dataset_path = dataset_path.replace('train', 'test') print "loading dataset from", dataset_path, "and", test_dataset_path x1_train, x2_train, y_train = joblib.load(dataset_path) if numpy.max(x1_train) > 1: x1_train = numpy.asarray(x1_train, dtype='float32') / 255 if numpy.max(x2_train) > 1: x2_train = numpy.asarray(x2_train, dtype='float32') / 255 x1_test, x2_test, y_test = joblib.load(test_dataset_path) if numpy.max(x1_test) > 1: x1_test = numpy.asarray(x1_test, dtype='float32') / 255 if numpy.max(x2_test) > 1: x2_test = numpy.asarray(x2_test, dtype='float32') / 255 ten_percent = int(0.1 * x1_train.shape[0]) train_set_iterator = iterator_type(x1_train[:-ten_percent], x2_train[:-ten_percent], y_train[:-ten_percent], batch_size=batch_size) valid_set_iterator = iterator_type(x1_train[-ten_percent:], x2_train[-ten_percent:], y_train[-ten_percent:], batch_size=batch_size) test_set_iterator = iterator_type(x1_test, x2_test, y_test, batch_size=batch_size) n_ins = x1_train.shape[1] n_outs = DIM_EMBEDDING else: SCALE = True N_SAMPLES = 10 from sklearn.datasets import fetch_mldata mnist = fetch_mldata('MNIST original') X = numpy.asarray(mnist.data, dtype='uint8') if SCALE: X = numpy.asarray(X, dtype='float32') X /= 255. y = numpy.asarray(mnist.target, dtype='uint8') X_train = X[:60000] y_train = y[:60000] xy = numpy.ndarray((X_train.shape[0], X_train.shape[1] + 1), dtype='float32') xy[:, :-1] = X_train xy[:, -1] = y_train numpy.random.shuffle(xy) ten_percent = int(0.1 * X_train.shape[0]) X_train = xy[:-ten_percent, :-1] y_train = xy[:-ten_percent, -1] X_valid = xy[-ten_percent:, :-1] y_valid = xy[-ten_percent:, -1] X_test = X[60000:] y_test = y[60000:] xy = numpy.ndarray((X_test.shape[0], X_test.shape[1] + 1), dtype='float32') xy[:, :-1] = X_test xy[:, -1] = y_test numpy.random.shuffle(xy) X_test = xy[:, :-1] y_test = xy[:, -1] print X_train.shape print X_valid.shape print X_test.shape train_set_iterator = DatasetABSamplingIteratorFromLabels(X_train, y_train, n_samples=N_SAMPLES, batch_size=batch_size) valid_set_iterator = DatasetABSamplingIteratorFromLabels(X_valid, y_valid, n_samples=N_SAMPLES, batch_size=batch_size) test_set_iterator = DatasetABSamplingIteratorFromLabels(X_test, y_test, n_samples=N_SAMPLES, batch_size=batch_size) n_ins = X_train.shape[1] n_outs = DIM_EMBEDDING assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # TODO the proper network type other than just dropout or not nnet = None fast_dropout = False if "fast_dropout" in network_type: fast_dropout = True if "dropout" in network_type: nnet = DropoutABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, #loss='cos_cos2', loss='hellinger', rho=0.95, eps=1.E-6, max_norm=4., fast_drop=fast_dropout, debugprint=debug_print) else: nnet = ABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='cos_cos2', rho=0.9, eps=1.E-6, max_norm=4., debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref = nnet.score_classif_same_diff_separated(train_set_iterator) valid_scoref = nnet.score_classif_same_diff_separated(valid_set_iterator) test_scoref = nnet.score_classif(test_set_iterator) data_iterator = train_set_iterator if debug_on_test_only: data_iterator = test_set_iterator train_scoref = test_scoref print '... training the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f) while (epoch < max_epochs) and (not done_looping): epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): avg_cost = 0. if "ab_net" in network_type: # remove need for this if if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x[0], x[1], y) else: avg_cost = train_fn(x[0], x[1], y, lr) if debug_print >= 3: print "cost:", avg_cost[0] if debug_plot >= 2: plot_costs(avg_cost[0]) if not len(avg_params_gradients_updates): avg_params_gradients_updates = map(numpy.asarray, avg_cost[1:]) else: avg_params_gradients_updates = rolling_avg_pgu( iteration, avg_params_gradients_updates, map(numpy.asarray, avg_cost[1:])) if debug_plot >= 3: plot_params_gradients_updates(iteration, avg_cost[1:]) else: if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x, y) else: avg_cost = train_fn(x, y, lr) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) print(' epoch %i, avg costs %f' % \ (epoch, numpy.mean(avg_costs))) tmp_train = zip(*train_scoref()) print(' epoch %i, training error same %f, diff %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f) if debug_on_test_only: continue # we check the validation loss on every epoch validation_losses = zip(*valid_scoref()) #this_validation_loss = -numpy.mean(validation_losses[0]) # TODO this is a mean of means (with different lengths) this_validation_loss = 0.5*(1.-numpy.mean(validation_losses[0])) +\ 0.5*numpy.mean(validation_losses[1]) print(' epoch %i, valid error same %f, diff %f' % \ (epoch, numpy.mean(validation_losses[0]), numpy.mean(validation_losses[1]))) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score_same = numpy.mean(test_losses[0]) # TODO this is a mean of means (with different lengths) test_score_diff = numpy.mean(test_losses[1]) # TODO this is a mean of means (with different lengths) print((' epoch %i, test error of best model same %f diff %f') % (epoch, test_score_same, test_score_diff)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f, ' 'with test performance %f') % (best_validation_loss, test_score)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f)
def run(dataset_path="from_aren.joblib", dataset_name='timit', batch_size=100, nframes=13, features="fbank", init_lr=0.01, max_epochs=500, network_type="AB", trainer_type="adadelta", layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], loss='cos_cos2', recurrent_connections=[], prefix_fname='', debug_print=0, debug_time=False, debug_plot=0): """ Configures and run the neural net on the given dataset. """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + features + str(nframes) output_file_name += "_" + network_type + "_" + trainer_type output_file_name += "_emb_" + str(DIM_EMBEDDING) print "output file name:", output_file_name n_ins = None n_outs = None print "loading dataset from", dataset_path # TODO DO A FUNCTION FOR DATASET LOADING CRAP if dataset_path[-7:] != '.joblib': print >> sys.stderr, "prepare your dataset with align_words.py or lucid.py or buckeye.py" sys.exit(-1) ### LOADING DATA data_same = joblib.load(dataset_path) shuffle(data_same) dev_split_at = int(0.9 * len(data_same)) print data_same[0] print data_same[0][3].shape n_ins = data_same[0][3].shape[1] * nframes n_outs = DIM_EMBEDDING normalize = True min_max_scale = False marginf = (nframes-1)/2 # TODO ### TRAIN SET train_set_iterator = DatasetDTWWrdSpkrIterator( data_same[:dev_split_at], normalize=normalize, min_max_scale=min_max_scale, scale_f1=None, scale_f2=None, nframes=nframes, batch_size=batch_size, marginf=marginf) f1 = train_set_iterator._scale_f1 f2 = train_set_iterator._scale_f2 ### DEV SET valid_set_iterator = DatasetDTWWrdSpkrIterator( data_same[dev_split_at:], normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' nnet = None fast_dropout = False if "dropout" in network_type: print >> sys.stderr, "Dropout is not implemented for ABnets with 2 Outputs" nnet = DropoutABNeuralNet(numpy_rng=numpy_rng, # TODO with 2 Outputs n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss=loss, rho=0.95, eps=1.E-6, max_norm=4., fast_drop=fast_dropout, debugprint=debug_print) else: nnet = ABNeuralNet2Outputs(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss=loss, rho=0.90, eps=1.E-6, max_norm=0., debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref_w = nnet.score_classif_same_diff_word_separated(train_set_iterator) valid_scoref_w = nnet.score_classif_same_diff_word_separated(valid_set_iterator) train_scoref_s = nnet.score_classif_same_diff_spkr_separated(train_set_iterator) valid_scoref_s = nnet.score_classif_same_diff_spkr_separated(valid_set_iterator) data_iterator = train_set_iterator print '... training the model' best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f, protocol=-1) while (epoch < max_epochs): epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): #print "x[0][0]", x[0][0] #print "x[1][0]", x[1][0] #print "y[0][0]", y[0][0] #print "y[1][0]", y[1][0] avg_cost = 0. if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x[0], x[1], y[0], y[1]) else: avg_cost = train_fn(x[0], x[1], y[0], y[1], lr) if debug_print >= 3: print "cost:", avg_cost[0] if debug_plot >= 2: plot_costs(avg_cost[0]) if not len(avg_params_gradients_updates): avg_params_gradients_updates = map(numpy.asarray, avg_cost[1:]) else: avg_params_gradients_updates = rolling_avg_pgu( iteration, avg_params_gradients_updates, map(numpy.asarray, avg_cost[1:])) if debug_plot >= 3: plot_params_gradients_updates(iteration, avg_cost[1:]) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) avg_cost = numpy.mean(avg_costs) if numpy.isnan(avg_cost): print("avg costs is NaN so we're stopping here!") break print(' epoch %i, avg costs %f' % \ (epoch, avg_cost)) tmp_train = zip(*train_scoref_w()) print(' epoch %i, training sim same words %f, diff words %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) tmp_train = zip(*train_scoref_s()) print(' epoch %i, training sim same spkrs %f, diff spkrs %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) lr = numpy.float32(init_lr / (numpy.sqrt(iteration) + 1.)) ### TODO # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f, protocol=-1) # we check the validation loss on every epoch validation_losses_w = zip(*valid_scoref_w()) validation_losses_s = zip(*valid_scoref_s()) this_validation_loss = 0.25*(1.-numpy.mean(validation_losses_w[0])) +\ 0.25*numpy.mean(validation_losses_w[1]) +\ 0.25*(1.-numpy.mean(validation_losses_s[0])) +\ 0.25*numpy.mean(validation_losses_s[1]) print(' epoch %i, valid sim same words %f, diff words %f' % \ (epoch, numpy.mean(validation_losses_w[0]), numpy.mean(validation_losses_w[1]))) print(' epoch %i, valid sim same spkrs %f, diff spkrs %f' % \ (epoch, numpy.mean(validation_losses_s[0]), numpy.mean(validation_losses_s[1]))) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1) # save best validation score and iteration number best_validation_loss = this_validation_loss end_time = time.clock() print(('Optimization complete with best validation score of %f, ') % (best_validation_loss)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1)
def run(dataset_path=DEFAULT_DATASET, dataset_name='timit', iterator_type=DatasetDTWIterator, batch_size=100, nframes=13, features="fbank", init_lr=0.01, max_epochs=500, network_type="dropout_net", trainer_type="adadelta", layers_types=[ReLU, ReLU, ReLU, ReLU, LogisticRegression], layers_sizes=[2400, 2400, 2400, 2400], dropout_rates=[0.2, 0.5, 0.5, 0.5, 0.5], recurrent_connections=[], prefix_fname='', debug_on_test_only=False, debug_print=0, debug_time=False, debug_plot=0): """ FIXME TODO """ output_file_name = dataset_name if prefix_fname != "": output_file_name = prefix_fname + "_" + dataset_name output_file_name += "_" + features + str(nframes) output_file_name += "_" + network_type + "_" + trainer_type output_file_name += "_emb_" + str(DIM_EMBEDDING) print "output file name:", output_file_name n_ins = None n_outs = None print "loading dataset from", dataset_path # TODO DO A FUNCTION if dataset_path[-7:] != '.joblib': print >> sys.stderr, "prepare your dataset with align_words.py" sys.exit(-1) ### LOADING DATA data_same = joblib.load(dataset_path) shuffle(data_same) has_dev_and_test_set = True dev_dataset_path = dataset_path[:-7].replace("train", "") + 'dev.joblib' test_dataset_path = dataset_path[:-7].replace("train", "") + 'test.joblib' dev_split_at = len(data_same) test_split_at = len(data_same) if not os.path.exists(dev_dataset_path) or not os.path.exists( test_dataset_path): has_dev_and_test_set = False dev_split_at = int(0.8 * dev_split_at) test_split_at = int(0.9 * test_split_at) n_ins = data_same[0][3].shape[1] * nframes n_outs = DIM_EMBEDDING normalize = True # TODO without min_max_scale = False marginf = (nframes - 1) / 2 # TODO ### TRAIN SET if has_dev_and_test_set: train_set_iterator = DatasetDTWWrdSpkrIterator( data_same, normalize=normalize, min_max_scale=min_max_scale, scale_f1=None, scale_f2=None, nframes=nframes, batch_size=batch_size, marginf=marginf) else: train_set_iterator = DatasetDTWWrdSpkrIterator( data_same[:dev_split_at], normalize=normalize, min_max_scale=min_max_scale, scale_f1=None, scale_f2=None, nframes=nframes, batch_size=batch_size, marginf=marginf) f1 = train_set_iterator._scale_f1 f2 = train_set_iterator._scale_f2 ### DEV SET if has_dev_and_test_set: data_same = joblib.load(dev_dataset_path) valid_set_iterator = DatasetDTWWrdSpkrIterator( data_same, normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) else: valid_set_iterator = DatasetDTWWrdSpkrIterator( data_same[dev_split_at:test_split_at], normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) ### TEST SET if has_dev_and_test_set: data_same = joblib.load(test_dataset_path) test_set_iterator = DatasetDTWWrdSpkrIterator( data_same, normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) else: test_set_iterator = DatasetDTWWrdSpkrIterator( data_same[test_split_at:], normalize=normalize, min_max_scale=min_max_scale, scale_f1=f1, scale_f2=f2, nframes=nframes, batch_size=batch_size, marginf=marginf) assert n_ins != None assert n_outs != None # numpy random generator numpy_rng = numpy.random.RandomState(123) print '... building the model' # TODO the proper network type other than just dropout or not nnet = None fast_dropout = False if "fast_dropout" in network_type: fast_dropout = True if "dropout" in network_type: nnet = DropoutABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='cos_cos2', rho=0.95, eps=1.E-6, max_norm=4., fast_drop=fast_dropout, debugprint=debug_print) else: # nnet = ABNeuralNet2Outputs(numpy_rng=numpy_rng, # n_ins=n_ins, # layers_types=layers_types, # layers_sizes=layers_sizes, # n_outs=n_outs, # loss='cos_cos2', # #loss='euclidean', # rho=0.90, # eps=1.E-6, # max_norm=0., # debugprint=debug_print) from nnet_archs import ABNeuralNet nnet = ABNeuralNet(numpy_rng=numpy_rng, n_ins=n_ins, layers_types=layers_types, layers_sizes=layers_sizes, n_outs=n_outs, loss='cos_cos2', rho=0.9, eps=1.E-6, max_norm=0., debugprint=debug_print) print "Created a neural net as:", print str(nnet) # get the training, validation and testing function for the model print '... getting the training functions' print trainer_type train_fn = None if debug_plot or debug_print: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer(debug=True) elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer(debug=True) else: train_fn = nnet.get_SGD_trainer(debug=True) else: if trainer_type == "adadelta": train_fn = nnet.get_adadelta_trainer() elif trainer_type == "adagrad": train_fn = nnet.get_adagrad_trainer() else: train_fn = nnet.get_SGD_trainer() train_scoref = nnet.score_classif_same_diff_separated(train_set_iterator) valid_scoref = nnet.score_classif_same_diff_separated(valid_set_iterator) test_scoref = nnet.score_classif(test_set_iterator) data_iterator = train_set_iterator if debug_on_test_only: print >> sys.stderr, "NOT IMPLEMENTED" sys.exit(-1) data_iterator = test_set_iterator print '... training the model' # early-stopping parameters patience = 1000 # look as this many examples regardless TODO patience_increase = 2. # wait this much longer when a new best is # found improvement_threshold = 0.995 # a relative improvement of this much is # considered significant best_validation_loss = numpy.inf test_score = 0. start_time = time.clock() done_looping = False epoch = 0 lr = init_lr timer = None if debug_plot: print_mean_weights_biases(nnet.params) #with open(output_file_name + 'epoch_0.pickle', 'wb') as f: # cPickle.dump(nnet, f, protocol=-1) while (epoch < max_epochs) and (not done_looping): if REDTW and ("ab_net" in network_type or "abnet" in network_type) and ((epoch + 1) % 20) == 0: print "recomputing DTW:" data_iterator.recompute_DTW(nnet.transform_x1()) epoch = epoch + 1 avg_costs = [] avg_params_gradients_updates = [] if debug_time: timer = time.time() for iteration, (x, y) in enumerate(data_iterator): #print "x[0][0]", x[0][0] #print "x[1][0]", x[1][0] #print "y[0][0]", y[0][0] #print "y[1][0]", y[1][0] avg_cost = 0. if "delta" in trainer_type: # TODO remove need for this if avg_cost = train_fn(x[0], x[1], y) else: avg_cost = train_fn(x[0], x[1], y, lr) if debug_print >= 3: print "cost:", avg_cost[0] if debug_plot >= 2: plot_costs(avg_cost[0]) if not len(avg_params_gradients_updates): avg_params_gradients_updates = map(numpy.asarray, avg_cost[1:]) else: avg_params_gradients_updates = rolling_avg_pgu( iteration, avg_params_gradients_updates, map(numpy.asarray, avg_cost[1:])) if debug_plot >= 3: plot_params_gradients_updates(iteration, avg_cost[1:]) if type(avg_cost) == list: avg_costs.append(avg_cost[0]) else: avg_costs.append(avg_cost) if debug_print >= 2: print_mean_weights_biases(nnet.params) if debug_plot >= 2: plot_params_gradients_updates(epoch, avg_params_gradients_updates) if debug_time: print(' epoch %i took %f seconds' % (epoch, time.time() - timer)) avg_cost = numpy.mean(avg_costs) if numpy.isnan(avg_cost): print("avg costs is NaN so we're stopping here!") break print(' epoch %i, avg costs %f' % \ (epoch, avg_cost)) tmp_train = zip(*train_scoref()) print(' epoch %i, training sim same %f, diff %f' % \ (epoch, numpy.mean(tmp_train[0]), numpy.mean(tmp_train[1]))) # TODO update lr(t) = lr(0) / (1 + lr(0) * lambda * t) lr = numpy.float32(init_lr / (numpy.sqrt(iteration) + 1.)) ### TODO #lr = numpy.float32(init_lr / (iteration + 1.)) ### TODO # or another scheme for learning rate decay #with open(output_file_name + 'epoch_' +str(epoch) + '.pickle', 'wb') as f: # cPickle.dump(nnet, f, protocol=-1) if debug_on_test_only: continue # we check the validation loss on every epoch validation_losses = zip(*valid_scoref()) this_validation_loss = 0.5*(1.-numpy.mean(validation_losses[0])) +\ 0.5*numpy.mean(validation_losses[1]) print(' epoch %i, valid sim same %f, diff %f' % \ (epoch, numpy.mean(validation_losses[0]), numpy.mean(validation_losses[1]))) # if we got the best validation score until now if this_validation_loss < best_validation_loss: with open(output_file_name + '.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1) # improve patience if loss improvement is good enough if (this_validation_loss < best_validation_loss * improvement_threshold): patience = max(patience, iteration * patience_increase) # save best validation score and iteration number best_validation_loss = this_validation_loss # test it on the test set test_losses = test_scoref() test_score_same = numpy.mean( test_losses[0] ) # TODO this is a mean of means (with different lengths) test_score_diff = numpy.mean( test_losses[1] ) # TODO this is a mean of means (with different lengths) print((' epoch %i, test sim of best model same %f diff %f') % (epoch, test_score_same, test_score_diff)) if patience <= iteration: # TODO correct that done_looping = True break end_time = time.clock() print(('Optimization complete with best validation score of %f, ' 'with test performance %f') % (best_validation_loss, test_score)) print >> sys.stderr, ('The fine tuning code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((end_time - start_time) / 60.)) with open(output_file_name + '_final.pickle', 'wb') as f: cPickle.dump(nnet, f, protocol=-1)