Ejemplo n.º 1
0
#
MODEL = "DATA_fbank_LABEL_phoneme48_HIDDEN_LAYERS_1024-1024-1024-1024_L_RATE_0.001_MOMENTUM_0.9_DROPOUT_0_EPOCH_100"
TEST_DATA,VAL_DATA = readfile( TEST_ROOT+TEST,1 )
PRED_FILE = open( PREDICTION_ROOT + PREDICTION ,'wb')

# Get Dictionaries
Phone48 = load_list39to48()
PhoneMap48to39 = load_dict_48to39()

# For CSV
HEADER = ["Id","Prediction"]

########################
#       Predict        #
########################


IDs,Feats = SepIDnFeat(TEST_DATA)

x = np.asarray(Feats,dtype='float32').T
y = nn.test(x)

maxpositions = np.argmax(y,axis=0)
output = [ PhoneMap48to39[Phone48[pos]] for pos in maxpositions ]

c = csv.writer(PRED_FILE,delimiter =',')
c.writerow(HEADER)
c.writerows(zip(IDs,output))

PRED_FILE.close()
Ejemplo n.º 2
0
    if (epoch+1 != MAX_EPOCH) and ((epoch+1) % L_RATE_DECAY_STEP == 0):
        print "learning rate annealed at epoch {0}".format(epoch+1)
        LEARNING_RATE /= 10

    if epoch+1 != MAX_EPOCH and (epoch+1) % SAVE_MODEL_EPOCH == 0:
        fh = open(MODEL_ROOT+MODEL+"_at_{0}".format(epoch+1),'wb')
        saved_params = (nn.layers, nn.W, nn.b)
        pickle.dump(saved_params, fh)
        fh.close()
    #pdb.set_trace()

    # Calculate Validation Set Error
    val_error_count = 0
    for val_x,val_ans in zip(VAL_INPUT,VAL_ANSWER):
        val_y = nn.test(val_x.T)
        val_max_idx = np.argmax(val_y,axis=0)
        val_error_count += len([i for i,j in zip(val_max_idx,val_ans) if i != j])

    val_error = float(val_error_count)/VAL_NUM


    print "Epoch:",epoch+1,"| Cost:",cost, "| Val Error", 100*val_error,'%', "| Epoch time:",tEnd-tStart

    # Reshuffle Batches
    TOTAL = zip(BATCHED_INPUT,BATCHED_OUTPUT)
    random.shuffle(TOTAL)
    BATCHED_INPUT = list(zip(*TOTAL))[0]
    BATCHED_OUTPUT = list(zip(*TOTAL))[1]

print "Total training time:",totaltime
Ejemplo n.º 3
0
phone_map_freq = np.zeros(PHONE_LENGTH)
for i in xrange(STATE_LENGTH):
    ph48_idx = PhoneIdx[ PhoneState[i] ]
    phone_map_freq[ph48_idx] += 1

print "Start test and saving..."
for idx in range(0,int(len(IDs)/BATCH_SIZE)):
    if idx%100==0:
        print idx
    begin = idx * BATCH_SIZE
    end   = (idx+1) * BATCH_SIZE
    if end > len(IDs):
        end = len(IDs)
    test_set = mem_data[:,begin:end]
    #training_set = mem_data[:,idx*BATCH_SIZE:(idx+1)*BATCH_SIZE]
    result = nn.test(test_set)

    # save result into pgram memmap.
    result = result.T
    for i in xrange(STATE_LENGTH):
        ph48_idx = PhoneIdx[ PhoneState[i] ]
        posteriorgram[begin:end, ph48_idx] += result[:,i]

    posteriorgram[begin:end,:] /= phone_map_freq

    # normalize to porb.
    ph_sum = np.zeros((BATCH_SIZE,1))
    ph_sum[:,0] = np.sum(posteriorgram[begin:end,:], axis=1)[:]
    posteriorgram[begin:end,:] /= ph_sum
    
    #posteriorgram[begin:end,:] = result[:,:].T
Ejemplo n.º 4
0
    
    if epoch+1 != MAX_EPOCH and (epoch+1) % SAVE_MODEL_EPOCH == 0:
        fh = open(MODEL_ROOT+MODEL+"_at_{0}".format(epoch+1),'wb')
        saved_params = (nn.layers, nn.W, nn.b)
        pickle.dump(saved_params, fh)
        fh.close()
    # Calculate Validation Set Error
    val_batch = 500
    val_output = []
    for i in xrange( (val_x.shape[1]-1)/val_batch +1):
        begin = i*val_batch
        if (i+1)*val_batch > val_x.shape[1]:
            end = val_x.shape[1]
        else:
            end = (i+1)*val_batch
        val_y = nn.test(val_x[:,begin:end])
        val_maxpositions = np.argmax(val_y,axis=0)
        val_output += [ PhoneState[pos] for pos in val_maxpositions ]

    val_error_count = len([ i for i,j in zip(val_output,val_label) if i != j])
    valerror = float(val_error_count)/len(val_output)

    tStartR = time.time()
    print "Epoch:",epoch+1,"| Cost:",cost, "| Val Error", 100*valerror,'%', "| Epoch time:",tEnd-tStart

    print "Reshuffling..."
    LABELED_BATCHED_TRAINING_SET = batch( LABELED_TRAINING_SET,BATCH_SIZE )
    LABELED_BATCHED_LABEL = MatchLabel2Batches( LABELED_BATCHED_TRAINING_SET,LABEL_DICT )
    BATCHED_TRAINING_SET   = removeBatchLabel(LABELED_BATCHED_TRAINING_SET)
    BATCHED_LABEL = removeBatchLabel(LABELED_BATCHED_LABEL)
    BATCHED_VECTORS = BatchedLabelToVector(BATCHED_LABEL)
Ejemplo n.º 5
0
mem_data = np.memmap(MEM_DATA,dtype='float32',mode='r',shape=MEM_DATA_shape)
IDs = readID(PKL_ID)
print "Data parsed"

########################
#  Save posteriorgram  #
########################

mem_shape = (STATE_LENGTH,len(IDs))
posteriorgram = np.memmap(PGRAM,dtype='float32',mode='w+',shape=mem_shape)

########################
#    Predict & To48    #
########################
print "Start test and saving..."
for idx in range(0,int(len(IDs)/BATCH_SIZE)):
    begin = idx * BATCH_SIZE
    end   = (idx+1) * BATCH_SIZE
    #if end > len(IDs):
    #    end = len(IDs)
    training_set = mem_data[:,begin:end]
    #training_set = mem_data[:,idx*BATCH_SIZE:(idx+1)*BATCH_SIZE]
    result = nn.test(training_set)
    # save result into pgram memmap.
    
    posteriorgram[:,begin:end] = result[:,:]

    del training_set
    del result
pdb.set_trace()
Ejemplo n.º 6
0
    print "Reshuffling..."
    pickList = shuffle(pickList)
    tEndR = time.time()
    print "Reshuffle time {0}".format(tEndR-tStartR)


print totaltime

########################
#  Traing set Result   #
########################

n_labels = 0
correct  = 0
for batched_inputs,batched_outputs in zip(BATCHED_INPUT,BATCHED_OUTPUT):
    result = nn.test(batched_inputs)
    n_labels += result.shape[1]
    result = np.argmax(result, axis=0)
    answer = np.argmax(batched_outputs, axis=0)
    equal_entries = result == answer
    correct += np.sum( equal_entries )
correctness = 100 * ( correct / float(n_labels) )
print "Training set Result {0}".format(correctness) + "%"

########################
#      Save Model      #
########################

filehandler = open(MODEL_ROOT+MODEL,'wb')
saved_params = (nn.layers, nn.W, nn.b)
pickle.dump(saved_params, filehandler)
Ejemplo n.º 7
0
p48to39dict = load_dict_48to39()

totaltime = 0
for epoch in range(MAX_EPOCH):
    tStart = time.time()
    cost = 0
    for batched_inputs,batched_outputs in zip(BATCHED_INPUT,BATCHED_OUTPUT):
        cost += nn.train(batched_inputs,batched_outputs)
    tEnd = time.time()
    totaltime += tEnd - tStart

    # Calculate Validation Error
    valsum = 0
    for val in validationNlabel:
        val_x = np.transpose( np.asarray([val[1]],dtype='float32') )
        p_feat = nn.test(val_x)
        pos = np.argmax(p_feat)
        p_48 = p48list[pos]
        p_39 = p48to39dict[p_48]
        if val[2] == p_48:
            valsum += 1
        print "valdiating:",val[2],p_48
    valcorrect = float(valsum)/len(validationNlabel)

    print "Epoch:",epoch,"Cost:",cost, "Epoch time:",tEnd-tStart,"Val correct,",valcorrect
print totaltime



########################
#      Save Model      #
Ejemplo n.º 8
0
# Get Dictionaries
Phone48 = load_liststateto48()
PhoneMap48to39 = load_dict_48to39()

# For CSV
HEADER = ["Id","Prediction"]

########################
#       Predict        #
########################


IDs,Feats = SepIDnFeat(TEST_DATA)
del TEST_DATA

x = np.asarray(Feats,dtype='float32')
x = np.transpose(x)

y = nn.test(x)

maxpositions = np.argmax(y,axis=0)
output = [ PhoneMap48to39[Phone48[pos]] for pos in maxpositions ]

del x
c = csv.writer(PRED_FILE,delimiter =',')
c.writerow(HEADER)
c.writerows(zip(IDs,output))

PRED_FILE.close()
Ejemplo n.º 9
0
        LEARNING_RATE*=0.5
    
    if epoch+1 != MAX_EPOCH and (epoch+1) % SAVE_MODEL_EPOCH == 0:
        fh = open(MODEL_ROOT+MODEL+"_at_{0}".format(epoch+1),'wb')
        saved_params = (nn.layers, nn.W, nn.b)
        pickle.dump(saved_params, fh)
        fh.close()
    # Calculate Validation Set Error
    # move the declaration outside the loop
    for i in xrange( (val_x.shape[1]-1)/val_batch +1):
        begin = i*val_batch
        if (i+1)*val_batch > val_x.shape[1]:
            end = val_x.shape[1]
        else:
            end = (i+1)*val_batch
        val_y = nn.test(val_x[:,begin:end])
        val_maxpositions = np.argmax(val_y,axis=0)
        val_output += [ PhoneState[pos] for pos in val_maxpositions ]

    val_error_count = len([ i for i,j in zip(val_output,val_label) if i != j])
    valerror = float(val_error_count)/len(val_output)

    tStartR = time.time()
    print "Epoch:",epoch+1,"| Cost:",cost, "| Val Error", 100*valerror,'%', "| Epoch time:",tEnd-tStart

    print "Reshuffling..."
    LABELED_TRAINING_SET = readfile_inloop()
    LABEL_DICT = readLabel()
    LABELED_BATCHED_TRAINING_SET = batch( LABELED_TRAINING_SET,BATCH_SIZE )
    del LABELED_TRAINING_SET
    LABELED_BATCHED_LABEL = MatchLabel2Batches( LABELED_BATCHED_TRAINING_SET,LABEL_DICT )
Ejemplo n.º 10
0
        pickle.dump(saved_params, fh)
        fh.close()
    # Calculate Validation Set Error
    # move the declaration outside the loop
    valerror = None
    if VAL_SET_RATIO != 1:
        val_batch = 512
        val_output = []
        val_error_count = 0
        for i in xrange( (val_data.shape[1]-1)/val_batch +1):
            begin = i*val_batch
            end = (i+1)*val_batch
            if end > val_data.shape[1]:
                end = val_data.shape[1]

            val_result = nn.test(val_data[:,begin:end])
            val_maxpositions = np.argmax(val_result,axis=0)
            #pdb.set_trace()
            #val_output += [ PhoneState[pos] for pos in val_maxpositions ]
            #val_output += val_maxpositions.tolist()
            val_error_count += len([ i \
                for i,j in zip(val_maxpositions,val_label[begin:end]) if i!=j])
        valerror = float(val_error_count)/len(val_label)

    tStartR = time.time()
    if VAL_SET_RATIO != 1:
        print "Epoch:",epoch+1,"| Cost:",cost,"| Val Error:", 100*valerror,'%', "| Epoch time:",tEnd-tStart
    else:
        print "Epoch:",epoch+1,"| Cost:",cost,"| Epoch time:",tEnd-tStart

    print "Reshuffling..."
Ejemplo n.º 11
0
        pickle.dump(saved_params, fh)
        fh.close()
    # Calculate Validation Set Error
    # move the declaration outside the loop
    valerror = None
    if VAL_SET_RATIO != 1:
        val_batch = 512
        val_output = []
        val_error_count = 0
        for i in xrange((val_data.shape[1] - 1) / val_batch + 1):
            begin = i * val_batch
            end = (i + 1) * val_batch
            if end > val_data.shape[1]:
                end = val_data.shape[1]

            val_result = nn.test(val_data[:, begin:end])
            val_maxpositions = np.argmax(val_result, axis=0)
            #pdb.set_trace()
            #val_output += [ PhoneState[pos] for pos in val_maxpositions ]
            #val_output += val_maxpositions.tolist()
            val_error_count += len([ i \
                for i,j in zip(val_maxpositions,val_label[begin:end]) if i!=j])
        valerror = float(val_error_count) / len(val_label)

    tStartR = time.time()
    if VAL_SET_RATIO != 1:
        print "Epoch:", epoch + 1, "| Cost:", cost, "| Val Error:", 100 * valerror, '%', "| Epoch time:", tEnd - tStart
    else:
        print "Epoch:", epoch + 1, "| Cost:", cost, "| Epoch time:", tEnd - tStart

    print "Reshuffling..."
Ejemplo n.º 12
0
p48to39dict = load_dict_48to39()

totaltime = 0
for epoch in range(MAX_EPOCH):
    tStart = time.time()
    cost = 0
    for batched_inputs, batched_outputs in zip(BATCHED_INPUT, BATCHED_OUTPUT):
        cost += nn.train(batched_inputs, batched_outputs)
    tEnd = time.time()
    totaltime += tEnd - tStart

    # Calculate Validation Error
    valsum = 0
    for val in validationNlabel:
        val_x = np.transpose(np.asarray([val[1]], dtype='float32'))
        p_feat = nn.test(val_x)
        pos = np.argmax(p_feat)
        p_48 = p48list[pos]
        p_39 = p48to39dict[p_48]
        if val[2] == p_48:
            valsum += 1
        print "valdiating:", val[2], p_48
    valcorrect = float(valsum) / len(validationNlabel)

    print "Epoch:", epoch, "Cost:", cost, "Epoch time:", tEnd - tStart, "Val correct,", valcorrect
print totaltime

########################
#      Save Model      #
########################
'''
Ejemplo n.º 13
0
mem_data = np.memmap(MEM_DATA, dtype='float32', mode='r', shape=MEM_DATA_shape)
IDs = readID(PKL_ID)
print "Data parsed"

########################
#  Save posteriorgram  #
########################

mem_shape = (STATE_LENGTH, len(IDs))
posteriorgram = np.memmap(PGRAM, dtype='float32', mode='w+', shape=mem_shape)

########################
#    Predict & To48    #
########################
print "Start test and saving..."
for idx in range(0, int(len(IDs) / BATCH_SIZE)):
    begin = idx * BATCH_SIZE
    end = (idx + 1) * BATCH_SIZE
    #if end > len(IDs):
    #    end = len(IDs)
    training_set = mem_data[:, begin:end]
    #training_set = mem_data[:,idx*BATCH_SIZE:(idx+1)*BATCH_SIZE]
    result = nn.test(training_set)
    # save result into pgram memmap.

    posteriorgram[:, begin:end] = result[:, :]

    del training_set
    del result
pdb.set_trace()