Esempio n. 1
0
 def _training_DNN(self):   
     trX, trY, self.missing_filename_list,  = read_features(self.test_number, self.n_input_f, self.n_output_f)     
     trX = trX[:,1:self.n_input_f]
     trY = trY[:,1:self.n_output_f]
     print trX.shape
     print trY.shape   
     print self.nloop, self.n_hidden_layer, self.n_input_f, self.n_hidden_f, self.n_output_f
     
     X = T.fmatrix()
     Y = T.fmatrix()
     py_x = self._model(X, self.params, self.bias)
     y_x = py_x
     cost = T.mean(T.sqr(py_x - Y))
     updates = self._sgd(cost, self.params, self.bias)
     train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
     self.predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
           
     for i in range(self.nloop, self.nloop + 0 ):
         print i
         #logging.debug('loop' + str(i))
         error_total = 0
         arr_X_Y = zip(range(0, len(trX), 128), range(128, len(trX), 128))
         for start, end in arr_X_Y:
             cost = train(trX[start:end], trY[start:end])
             error_total += cost
             #print cost
         last_element = arr_X_Y[len(arr_X_Y)-1][0] 
         if last_element < len(trX):
             cost = train(trX[last_element: len(trX)], trY[last_element:len(trY)])    
             error_total += cost
         print error_total / len(trX)
         save_weight_info( self.filename, i, self.n_hidden_layer, self.n_input_f, self.n_hidden_f, self.n_output_f, self.params, error_total, self.bias)
         self.id_file = 1 - self.id_file
         self.filename = self.weight_folder + 'id_' + str(self.id_file) + ".txt"            
 def _training_DNN(self):
     trX, trY, self.missing_filename_list, self.test_number = read_features()        
     
     load_params = False
     
     id_file = 0
     weight_folder = '../weight_DNN/SQR/' + self.hidden_layer + self.artic + 'test_' + str(self.test_number) + '/'
     
     if not os.path.exists(weight_folder):
         os.makedirs(weight_folder)
         
     filename = weight_folder + 'Phonemic_DNN_SGD_id_' + str(id_file) + ".txt"
     
     if load_params:
         self.nloop,self.n_hidden_layer, self.n_input_f, self.n_hidden_f, self.n_output_f, params = load_weight_info(filename)             
     else:
         self.nloop = 0
         self.n_hidden_layer = 5
         self.n_input_f = 109
         self.n_hidden_f = 512
         self.n_output_f = 37
         params = load_initial_info(self.n_hidden_layer, self.n_input_f, self.n_hidden_f, self.n_output_f)    
         
     trX = trX[:,1:self.n_input_f]
     trY = trY[:,1:self.n_output_f]
     print trX.shape
     print trY.shape   
     print self.nloop, self.n_hidden_layer, self.n_input_f, self.n_hidden_f, self.n_output_f
     
     X = T.fmatrix()
     Y = T.fmatrix()
     py_x = self._model(X, params)
     y_x = py_x
     #cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
     cost = T.mean(T.sqr(py_x - Y))
     #params = [w_h, w_h1, w_h2, w_h3, w_o]
     updates = self._sgd(cost, params)
     train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
     self.predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
     #LOG_FILENAME = 'DNN.log'
     #logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG)
           
     for i in range(self.nloop, self.nloop + 1000):
         print i
         #logging.debug('loop' + str(i))
         error_total = 0
         arr_X_Y = zip(range(0, len(trX), 128), range(128, len(trX), 128))
         for start, end in arr_X_Y:
             cost = train(trX[start:end], trY[start:end])
             error_total += cost
             #print cost
         last_element = arr_X_Y[len(arr_X_Y)-1][0] 
         if last_element < len(trX):
             cost = train(trX[last_element: len(trX)], trY[last_element:len(trY)])    
             error_total += cost
         print error_total / len(trX)
         save_weight_info( filename, i, self.n_hidden_layer, self.n_input_f, self.n_hidden_f, self.n_output_f, params, error_total)
         id_file = 1 - id_file
         filename = weight_folder + 'Phonemic_DNN_SGD_id_' + str(id_file) + ".txt"
    def _training_DNN(self):

        trX, trY, self.missing_filename_list, = read_features(self.test_number, self.n_input_f, self.n_output_f)
        trX = trX[:, 1 : self.n_input_f]
        trY = trY[:, 1 : self.n_output_f]
        print trX.shape
        print trY.shape
        print self.nloop, self.n_hidden_layer, self.n_input_f, self.n_hidden_f, self.n_output_f

        X = T.fmatrix()
        Y = T.fmatrix()
        py_x = self._model(X, self.params)
        y_x = py_x
        # cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
        cost = T.mean(T.sqr(py_x - Y))
        # params = [w_h, w_h1, w_h2, w_h3, w_o]
        updates = self._sgd(cost, self.params)
        train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
        self.predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
        # LOG_FILENAME = 'DNN.log'
        # logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG)

        for i in range(self.nloop, self.nloop + 500):
            print i
            # logging.debug('loop' + str(i))
            error_total = 0
            arr_X_Y = zip(range(0, len(trX), 128), range(128, len(trX), 128))
            for start, end in arr_X_Y:
                cost = train(trX[start:end], trY[start:end])
                error_total += cost
                # print cost
            last_element = arr_X_Y[len(arr_X_Y) - 1][0]
            if last_element < len(trX):
                cost = train(trX[last_element : len(trX)], trY[last_element : len(trY)])
                error_total += cost
            print error_total / len(trX)
            save_weight_info(
                self.filename,
                i,
                self.n_hidden_layer,
                self.n_input_f,
                self.n_hidden_f,
                self.n_output_f,
                self.params,
                error_total,
            )
            self.id_file = 1 - self.id_file
            self.filename = self.weight_folder + "Phonemic_DNN_SGD_id_" + str(self.id_file) + ".txt"
def deep_neural_network():
    trX, trY = read_features()
    
    X = T.fmatrix()
    Y = T.fmatrix()    
    load_params = False
    hidden_layer = '6_layers/'       # = n_hidden layer below
    artic = 'artic/'
    measure = 'SQR/'
    id_file = 0
    weight_folder = '../weight_DNN/' + hidden_layer + measure + artic
    
    if not os.path.exists(weight_folder):
        os.makedirs(weight_folder)
        
    filename = weight_folder + 'Phonemic_DNN_SGD_id_' + str(id_file) + ".txt"
    
    if load_params:
        nloop,n_hidden_layer, n_input_f, n_hidden_f, n_output_f, params, bias = load_weight_info(filename)             
    else:
        print "load Initial"
        nloop = 0
        n_hidden_layer = 1
        n_input_f = 20
        n_hidden_f = 100
        n_output_f = 15
        params, bias = load_initial_info(n_hidden_layer, n_input_f, n_hidden_f, n_output_f)    
        
    trX = trX[:,1:n_input_f]
    trY = trY[:,1:n_output_f]
    #trX = trX[1:200,1:2]
    #trY = trY[1:200:,1:2]
    
    print trX.shape
    print trY.shape   
    print nloop,n_hidden_layer, n_input_f, n_hidden_f, n_output_f
    #print params
    print "_-----------"
    #print bias
    py_x = model(X, params, bias)
    
    y_x = py_x
    #cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
    cost = T.mean(T.sqr(py_x - Y))
    
    updates = sgd(cost, params, bias)
#     for u in xrange(len(params)):
#         print params[u]
#         c = params[u].get_value()
#         print c.shape
#     for u in xrange(len(bias)):
#         print bias[u]
#         c = bias[u].get_value()
#         print c.shape
    #exit()
    train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
    predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
    
    LOG_FILENAME = 'DNN.log'
    logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG)
        
    for i in range(nloop, nloop + 10):
        print "i ", i
        #logging.debug('loop' + str(i))
        error_total = 0
        arr_X_Y = zip(range(0, len(trX), 128), range(128, len(trX), 128))
        #arr_X_Y = zip(range(0, len(trX), 1), range(1, len(trX), 1))
        
        for start, end in arr_X_Y:
            cost = train(trX[start:end], trY[start:end])
            error_total += cost
            #print cost
        print error_total
#         last_element = arr_X_Y[len(arr_X_Y)-1][0] 
#         if last_element < len(trX):
#             cost = train(trX[last_element: len(trX)], trY[last_element:len(trY)])    
#             error_total += cost
#         print error_total / len(trX)

        save_weight_info( filename, i, n_hidden_layer, n_input_f, n_hidden_f, n_output_f, params, error_total, bias)
        id_file = 1 - id_file
        filename = weight_folder + 'Phonemic_DNN_SGD_id_' + str(id_file) + ".txt"
        #exit()
    #plt.plot(trX,trY,'.')
    #plt.plot(trX,trX *  w.get_value() + b.get_value(), "red")
    exit()
    feature_out_dir = '/home/danglab/Phong/norm/output_norm/'
    test_dir = '/home/danglab/Phong/TestData/Features_Norm/minus/6dB/'
    dnn_predict_dir = '/home/danglab/DNN_Predict/DNN_Bias/'+ measure + artic + 'minus/6dB/'
    
    if not os.path.exists(dnn_predict_dir):
        os.makedirs(dnn_predict_dir)
        
    listtest = sorted(os.listdir(test_dir))
    cnt = 0
    for afile in listtest:
        #print afile                 #usctimit_ema_f1_001_005_100ms_noise_in.txt
        test_arr, factors = read_file_test(test_dir + afile, n_input_f, "factors")                                #read a missing_feature
        find_ = [m.start() for m in re.finditer('_', afile)]      
        file_mat = (afile.replace(afile[find_[4]:find_[6]],'')).replace('in.','out.')   #usctimit_ema_f1_001_005_out.txt
        #test_res_arr = read_file_test(feature_out_dir + file_mat, n_output_f)              #read an original output feature
        energy = test_arr[:,0]          #ko cho energy vao DNN
        test_arr = test_arr[:,1:n_input_f]
        print factors
        write_predict_2_file(dnn_predict_dir + afile.replace(afile[find_[5]:find_[6]],'').replace("_out",''), energy, predict(test_arr), factors)      # write result to file
def deep_neural_network():
    trX, trY = read_features()        
    trX, mask, max_x = abs_normal_matrix(trX)
    for u in xrange(trX.shape[0]):
        trY[u] = np.concatenate((trX[u][0:13], trX[u][37:37+24]))
    
    #print trX.shape
    #print trY.shape
        
    X = T.fmatrix()
    Y = T.fmatrix()    
    load_params = True
    hidden_layer = '6_layers/'       # = n_hidden layer below
    artic = 'artic/'
    id_file = 0
    weight_folder = '../weight_DNN/' + hidden_layer + artic
    
    if not os.path.exists(weight_folder):
        os.makedirs(weight_folder)
        
    filename = weight_folder + 'Phonemic_DNN_SGD_id_' + str(id_file) + ".txt"
    
    if load_params:
        nloop,n_hidden_layer, n_input_f, n_hidden_f, n_output_f, params = load_weight_info(filename)             
    else:
        nloop = 0
        n_hidden_layer = 6
        n_input_f = 109
        n_hidden_f = 512 
        n_output_f = 37
        params = load_initial_info(n_hidden_layer, n_input_f, n_hidden_f, n_output_f)    
        
    trX = trX[:,1:n_input_f]
    print trX.max(), trX.min()
    trY = trY[:,1:n_output_f]
    print trY.max(), trY.min()
    #print trX.shape
    #print trY.shape
    print "trX"   
    #print trX
    print "trY"
    #print trY
    print nloop,n_hidden_layer, n_input_f, n_hidden_f, n_output_f
    
    py_x = model(X, params)
    y_x = py_x
    #cost = T.mean(T.nnet.categorical_crossentropy(py_x, Y))
    cost = T.mean(T.sqr(py_x - Y))
    #params = [w_h, w_h1, w_h2, w_h3, w_o]
    updates = sgd(cost, params)
    train = theano.function(inputs=[X, Y], outputs=cost, updates=updates, allow_input_downcast=True)
    predict = theano.function(inputs=[X], outputs=y_x, allow_input_downcast=True)
    
    LOG_FILENAME = 'DNN.log'
    logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG)
          
    for i in range(nloop, nloop + 1):
        print i
        #logging.debug('loop' + str(i))
        error_total = 0
        arr_X_Y = zip(range(0, len(trX), 128), range(128, len(trX), 128))
        for start, end in arr_X_Y:
            cost = train(trX[start:end], trY[start:end])
            error_total += cost
            #print cost
        last_element = arr_X_Y[len(arr_X_Y)-1][0] 
        #logging.warning(str(params[n_hidden_layer - 1].get_value()))
        
        if last_element < len(trX):
            cost = train(trX[last_element: len(trX)], trY[last_element:len(trY)])    
            error_total += cost
        print error_total #/ len(trX)
        save_weight_info( filename, i, n_hidden_layer, n_input_f, n_hidden_f, n_output_f, params, error_total)
        id_file = 1 - id_file
        filename = weight_folder + 'Phonemic_DNN_SGD_id_' + str(id_file) + ".txt"
       
    #feature_out_dir = '/home/danglab/Phong/norm/output_norm/'
    test_dir = '/home/danglab/Phong/TestData/Features/minus/6dB/'
    dnn_predict_dir = '/home/danglab/DNN_Predict/normal_all/' + artic + 'minus/6dB/'
    
    if not os.path.exists(dnn_predict_dir):
        os.makedirs(dnn_predict_dir)
        
    listtest = sorted(os.listdir(test_dir))
    cnt = 0
    
    for afile in listtest:
        #print afile                 #usctimit_ema_f1_001_005_100ms_noise_in.txt
        test_arr = read_file_test(test_dir + afile, n_input_f, "factors")                                #read a missing_feature
        find_ = [m.start() for m in re.finditer('_', afile)]      
        file_mat = (afile.replace(afile[find_[4]:find_[6]],'')).replace('in.','out.')   #usctimit_ema_f1_001_005_out.txt
        #test_res_arr = read_file_test(feature_out_dir + file_mat, n_output_f)              #read an original output feature
        test_arr, mask, max_arr = abs_normal_matrix(test_arr)
        
        #print test_arr
        energy = test_arr[:,0]          #ko cho energy vao DNN
        test_arr = test_arr[:,1:n_input_f]
        
        #print "max_arr", max_arr
        write_predict_2_file(dnn_predict_dir + afile.replace(afile[find_[5]:find_[6]],'').replace("_out",''), energy, predict(test_arr), mask, max_arr)      # write result to file