# MODEL = "DATA_fbank_LABEL_phoneme48_HIDDEN_LAYERS_1024-1024-1024-1024_L_RATE_0.001_MOMENTUM_0.9_DROPOUT_0_EPOCH_100" TEST_DATA,VAL_DATA = readfile( TEST_ROOT+TEST,1 ) PRED_FILE = open( PREDICTION_ROOT + PREDICTION ,'wb') # Get Dictionaries Phone48 = load_list39to48() PhoneMap48to39 = load_dict_48to39() # For CSV HEADER = ["Id","Prediction"] ######################## # Predict # ######################## IDs,Feats = SepIDnFeat(TEST_DATA) x = np.asarray(Feats,dtype='float32').T y = nn.test(x) maxpositions = np.argmax(y,axis=0) output = [ PhoneMap48to39[Phone48[pos]] for pos in maxpositions ] c = csv.writer(PRED_FILE,delimiter =',') c.writerow(HEADER) c.writerows(zip(IDs,output)) PRED_FILE.close()
if (epoch+1 != MAX_EPOCH) and ((epoch+1) % L_RATE_DECAY_STEP == 0): print "learning rate annealed at epoch {0}".format(epoch+1) LEARNING_RATE /= 10 if epoch+1 != MAX_EPOCH and (epoch+1) % SAVE_MODEL_EPOCH == 0: fh = open(MODEL_ROOT+MODEL+"_at_{0}".format(epoch+1),'wb') saved_params = (nn.layers, nn.W, nn.b) pickle.dump(saved_params, fh) fh.close() #pdb.set_trace() # Calculate Validation Set Error val_error_count = 0 for val_x,val_ans in zip(VAL_INPUT,VAL_ANSWER): val_y = nn.test(val_x.T) val_max_idx = np.argmax(val_y,axis=0) val_error_count += len([i for i,j in zip(val_max_idx,val_ans) if i != j]) val_error = float(val_error_count)/VAL_NUM print "Epoch:",epoch+1,"| Cost:",cost, "| Val Error", 100*val_error,'%', "| Epoch time:",tEnd-tStart # Reshuffle Batches TOTAL = zip(BATCHED_INPUT,BATCHED_OUTPUT) random.shuffle(TOTAL) BATCHED_INPUT = list(zip(*TOTAL))[0] BATCHED_OUTPUT = list(zip(*TOTAL))[1] print "Total training time:",totaltime
phone_map_freq = np.zeros(PHONE_LENGTH) for i in xrange(STATE_LENGTH): ph48_idx = PhoneIdx[ PhoneState[i] ] phone_map_freq[ph48_idx] += 1 print "Start test and saving..." for idx in range(0,int(len(IDs)/BATCH_SIZE)): if idx%100==0: print idx begin = idx * BATCH_SIZE end = (idx+1) * BATCH_SIZE if end > len(IDs): end = len(IDs) test_set = mem_data[:,begin:end] #training_set = mem_data[:,idx*BATCH_SIZE:(idx+1)*BATCH_SIZE] result = nn.test(test_set) # save result into pgram memmap. result = result.T for i in xrange(STATE_LENGTH): ph48_idx = PhoneIdx[ PhoneState[i] ] posteriorgram[begin:end, ph48_idx] += result[:,i] posteriorgram[begin:end,:] /= phone_map_freq # normalize to porb. ph_sum = np.zeros((BATCH_SIZE,1)) ph_sum[:,0] = np.sum(posteriorgram[begin:end,:], axis=1)[:] posteriorgram[begin:end,:] /= ph_sum #posteriorgram[begin:end,:] = result[:,:].T
if epoch+1 != MAX_EPOCH and (epoch+1) % SAVE_MODEL_EPOCH == 0: fh = open(MODEL_ROOT+MODEL+"_at_{0}".format(epoch+1),'wb') saved_params = (nn.layers, nn.W, nn.b) pickle.dump(saved_params, fh) fh.close() # Calculate Validation Set Error val_batch = 500 val_output = [] for i in xrange( (val_x.shape[1]-1)/val_batch +1): begin = i*val_batch if (i+1)*val_batch > val_x.shape[1]: end = val_x.shape[1] else: end = (i+1)*val_batch val_y = nn.test(val_x[:,begin:end]) val_maxpositions = np.argmax(val_y,axis=0) val_output += [ PhoneState[pos] for pos in val_maxpositions ] val_error_count = len([ i for i,j in zip(val_output,val_label) if i != j]) valerror = float(val_error_count)/len(val_output) tStartR = time.time() print "Epoch:",epoch+1,"| Cost:",cost, "| Val Error", 100*valerror,'%', "| Epoch time:",tEnd-tStart print "Reshuffling..." LABELED_BATCHED_TRAINING_SET = batch( LABELED_TRAINING_SET,BATCH_SIZE ) LABELED_BATCHED_LABEL = MatchLabel2Batches( LABELED_BATCHED_TRAINING_SET,LABEL_DICT ) BATCHED_TRAINING_SET = removeBatchLabel(LABELED_BATCHED_TRAINING_SET) BATCHED_LABEL = removeBatchLabel(LABELED_BATCHED_LABEL) BATCHED_VECTORS = BatchedLabelToVector(BATCHED_LABEL)
mem_data = np.memmap(MEM_DATA,dtype='float32',mode='r',shape=MEM_DATA_shape) IDs = readID(PKL_ID) print "Data parsed" ######################## # Save posteriorgram # ######################## mem_shape = (STATE_LENGTH,len(IDs)) posteriorgram = np.memmap(PGRAM,dtype='float32',mode='w+',shape=mem_shape) ######################## # Predict & To48 # ######################## print "Start test and saving..." for idx in range(0,int(len(IDs)/BATCH_SIZE)): begin = idx * BATCH_SIZE end = (idx+1) * BATCH_SIZE #if end > len(IDs): # end = len(IDs) training_set = mem_data[:,begin:end] #training_set = mem_data[:,idx*BATCH_SIZE:(idx+1)*BATCH_SIZE] result = nn.test(training_set) # save result into pgram memmap. posteriorgram[:,begin:end] = result[:,:] del training_set del result pdb.set_trace()
print "Reshuffling..." pickList = shuffle(pickList) tEndR = time.time() print "Reshuffle time {0}".format(tEndR-tStartR) print totaltime ######################## # Traing set Result # ######################## n_labels = 0 correct = 0 for batched_inputs,batched_outputs in zip(BATCHED_INPUT,BATCHED_OUTPUT): result = nn.test(batched_inputs) n_labels += result.shape[1] result = np.argmax(result, axis=0) answer = np.argmax(batched_outputs, axis=0) equal_entries = result == answer correct += np.sum( equal_entries ) correctness = 100 * ( correct / float(n_labels) ) print "Training set Result {0}".format(correctness) + "%" ######################## # Save Model # ######################## filehandler = open(MODEL_ROOT+MODEL,'wb') saved_params = (nn.layers, nn.W, nn.b) pickle.dump(saved_params, filehandler)
p48to39dict = load_dict_48to39() totaltime = 0 for epoch in range(MAX_EPOCH): tStart = time.time() cost = 0 for batched_inputs,batched_outputs in zip(BATCHED_INPUT,BATCHED_OUTPUT): cost += nn.train(batched_inputs,batched_outputs) tEnd = time.time() totaltime += tEnd - tStart # Calculate Validation Error valsum = 0 for val in validationNlabel: val_x = np.transpose( np.asarray([val[1]],dtype='float32') ) p_feat = nn.test(val_x) pos = np.argmax(p_feat) p_48 = p48list[pos] p_39 = p48to39dict[p_48] if val[2] == p_48: valsum += 1 print "valdiating:",val[2],p_48 valcorrect = float(valsum)/len(validationNlabel) print "Epoch:",epoch,"Cost:",cost, "Epoch time:",tEnd-tStart,"Val correct,",valcorrect print totaltime ######################## # Save Model #
# Get Dictionaries Phone48 = load_liststateto48() PhoneMap48to39 = load_dict_48to39() # For CSV HEADER = ["Id","Prediction"] ######################## # Predict # ######################## IDs,Feats = SepIDnFeat(TEST_DATA) del TEST_DATA x = np.asarray(Feats,dtype='float32') x = np.transpose(x) y = nn.test(x) maxpositions = np.argmax(y,axis=0) output = [ PhoneMap48to39[Phone48[pos]] for pos in maxpositions ] del x c = csv.writer(PRED_FILE,delimiter =',') c.writerow(HEADER) c.writerows(zip(IDs,output)) PRED_FILE.close()
LEARNING_RATE*=0.5 if epoch+1 != MAX_EPOCH and (epoch+1) % SAVE_MODEL_EPOCH == 0: fh = open(MODEL_ROOT+MODEL+"_at_{0}".format(epoch+1),'wb') saved_params = (nn.layers, nn.W, nn.b) pickle.dump(saved_params, fh) fh.close() # Calculate Validation Set Error # move the declaration outside the loop for i in xrange( (val_x.shape[1]-1)/val_batch +1): begin = i*val_batch if (i+1)*val_batch > val_x.shape[1]: end = val_x.shape[1] else: end = (i+1)*val_batch val_y = nn.test(val_x[:,begin:end]) val_maxpositions = np.argmax(val_y,axis=0) val_output += [ PhoneState[pos] for pos in val_maxpositions ] val_error_count = len([ i for i,j in zip(val_output,val_label) if i != j]) valerror = float(val_error_count)/len(val_output) tStartR = time.time() print "Epoch:",epoch+1,"| Cost:",cost, "| Val Error", 100*valerror,'%', "| Epoch time:",tEnd-tStart print "Reshuffling..." LABELED_TRAINING_SET = readfile_inloop() LABEL_DICT = readLabel() LABELED_BATCHED_TRAINING_SET = batch( LABELED_TRAINING_SET,BATCH_SIZE ) del LABELED_TRAINING_SET LABELED_BATCHED_LABEL = MatchLabel2Batches( LABELED_BATCHED_TRAINING_SET,LABEL_DICT )
pickle.dump(saved_params, fh) fh.close() # Calculate Validation Set Error # move the declaration outside the loop valerror = None if VAL_SET_RATIO != 1: val_batch = 512 val_output = [] val_error_count = 0 for i in xrange( (val_data.shape[1]-1)/val_batch +1): begin = i*val_batch end = (i+1)*val_batch if end > val_data.shape[1]: end = val_data.shape[1] val_result = nn.test(val_data[:,begin:end]) val_maxpositions = np.argmax(val_result,axis=0) #pdb.set_trace() #val_output += [ PhoneState[pos] for pos in val_maxpositions ] #val_output += val_maxpositions.tolist() val_error_count += len([ i \ for i,j in zip(val_maxpositions,val_label[begin:end]) if i!=j]) valerror = float(val_error_count)/len(val_label) tStartR = time.time() if VAL_SET_RATIO != 1: print "Epoch:",epoch+1,"| Cost:",cost,"| Val Error:", 100*valerror,'%', "| Epoch time:",tEnd-tStart else: print "Epoch:",epoch+1,"| Cost:",cost,"| Epoch time:",tEnd-tStart print "Reshuffling..."
pickle.dump(saved_params, fh) fh.close() # Calculate Validation Set Error # move the declaration outside the loop valerror = None if VAL_SET_RATIO != 1: val_batch = 512 val_output = [] val_error_count = 0 for i in xrange((val_data.shape[1] - 1) / val_batch + 1): begin = i * val_batch end = (i + 1) * val_batch if end > val_data.shape[1]: end = val_data.shape[1] val_result = nn.test(val_data[:, begin:end]) val_maxpositions = np.argmax(val_result, axis=0) #pdb.set_trace() #val_output += [ PhoneState[pos] for pos in val_maxpositions ] #val_output += val_maxpositions.tolist() val_error_count += len([ i \ for i,j in zip(val_maxpositions,val_label[begin:end]) if i!=j]) valerror = float(val_error_count) / len(val_label) tStartR = time.time() if VAL_SET_RATIO != 1: print "Epoch:", epoch + 1, "| Cost:", cost, "| Val Error:", 100 * valerror, '%', "| Epoch time:", tEnd - tStart else: print "Epoch:", epoch + 1, "| Cost:", cost, "| Epoch time:", tEnd - tStart print "Reshuffling..."
p48to39dict = load_dict_48to39() totaltime = 0 for epoch in range(MAX_EPOCH): tStart = time.time() cost = 0 for batched_inputs, batched_outputs in zip(BATCHED_INPUT, BATCHED_OUTPUT): cost += nn.train(batched_inputs, batched_outputs) tEnd = time.time() totaltime += tEnd - tStart # Calculate Validation Error valsum = 0 for val in validationNlabel: val_x = np.transpose(np.asarray([val[1]], dtype='float32')) p_feat = nn.test(val_x) pos = np.argmax(p_feat) p_48 = p48list[pos] p_39 = p48to39dict[p_48] if val[2] == p_48: valsum += 1 print "valdiating:", val[2], p_48 valcorrect = float(valsum) / len(validationNlabel) print "Epoch:", epoch, "Cost:", cost, "Epoch time:", tEnd - tStart, "Val correct,", valcorrect print totaltime ######################## # Save Model # ######################## '''
mem_data = np.memmap(MEM_DATA, dtype='float32', mode='r', shape=MEM_DATA_shape) IDs = readID(PKL_ID) print "Data parsed" ######################## # Save posteriorgram # ######################## mem_shape = (STATE_LENGTH, len(IDs)) posteriorgram = np.memmap(PGRAM, dtype='float32', mode='w+', shape=mem_shape) ######################## # Predict & To48 # ######################## print "Start test and saving..." for idx in range(0, int(len(IDs) / BATCH_SIZE)): begin = idx * BATCH_SIZE end = (idx + 1) * BATCH_SIZE #if end > len(IDs): # end = len(IDs) training_set = mem_data[:, begin:end] #training_set = mem_data[:,idx*BATCH_SIZE:(idx+1)*BATCH_SIZE] result = nn.test(training_set) # save result into pgram memmap. posteriorgram[:, begin:end] = result[:, :] del training_set del result pdb.set_trace()