def feature_re_extract():
    #pdb.set_trace()
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    m = ModelInterface.load('model/model.out')
    
    # construct train set
    train_set = []
    for c in test_class:
    		for i in m.features[c]:
    				train_set.append(i)
    				
    # construct autoencoder
    train_data = T.dmatrix('train_data')
    x = T.dmatrix('x')  # the data is presented as rasterized images
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 10))
    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=39,
        n_hidden=100
    )
    cost, updates = da.get_cost_updates(
        corruption_level=0.,
        learning_rate=0.4
    )
    train_da = theano.function(
    		[train_data],
        cost,
        updates=updates,
        givens={
            x: train_data
        }
    )
    # train autoencoder
    training_epochs = 100
    c1 = []
    for epoch in xrange(training_epochs):
    		c1.append(train_da(numpy.asarray(train_set)))
    		print 'Training epoch %d, cost ' % epoch, c1[len(c1)-1]
    
    for c in test_class:
    		m.features[c] = da.get_hidden_values(m.features[c]).eval()
    
    m.train()
    m.dump('model/model_da.out')
    with open('model/da.out', 'w') as f:
    		pickle.dump(da, f, -1)
    def train(self, train_set, batch_size=100):
        for i in xrange(len(self.layers) - 1):
            train_data = T.dmatrix("train_data")
            x = T.dmatrix("x")
            rng = numpy.random.RandomState(123)
            theano_rng = RandomStreams(rng.randint(2 ** 10))
            da = dA(
                numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=self.layers[i], n_hidden=self.layers[i + 1]
            )
            cost, updates = da.get_cost_updates(corruption_level=0.0, learning_rate=0.4)
            train_da = theano.function([train_data], cost, updates=updates, givens={x: train_data})

            for epoch in xrange(20):
                train_cost = []
                for index in xrange(len(train_set) / batch_size):
                    train_cost.append(train_da(numpy.asarray(train_set[index * batch_size : (index + 1) * batch_size])))
                print "Training 1st ae epoch %d, cost " % epoch, numpy.mean(train_cost)
            train_set = da.get_hidden_values(train_set).eval()
            self.dAs.append(da)
def feature_re_extract():
    #pdb.set_trace()
    test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 
    		'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 
    		'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 
    		'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S',
    		'MPRA_S', 'MREM_S', 'MTLS_S']
    m = ModelInterface.load('model/model.out')
    
    # construct train set
    train_set = []
    up_bound = []
    lower_bound = []
    for c in test_class:
    		for i in m.features[c]:
    				train_set.append(i)
    '''
    # put all values into -1~1
    up_bound = []
    lower_bound = []
    for j in xrange(len(train_set[0])):
    		up_bound.append(train_set[0][j])
    		lower_bound.append(train_set[0][j])
    
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				up_bound[j] = max(up_bound[j], train_set[i][j])
    				lower_bound[j] = min(lower_bound[j], train_set[i][j])
    
    for i in xrange(len(train_set)):
    		for j in xrange(len(train_set[0])):
    				train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1
    '''				
    # construct autoencoder
    train_data = T.dmatrix('train_data')
    x = T.dmatrix('x')  # the data is presented as rasterized images
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 10))
    da = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=39,
        n_hidden=30
    )
    cost, updates = da.get_cost_updates(
        corruption_level=0.,
        learning_rate=0.4
    )
    train_da = theano.function(
    		[train_data],
        cost,
        updates=updates,
        givens={
            x: train_data
        }
    )
    # train first autoencoder
    training_epochs = 20
    c1 = []
    for epoch in xrange(training_epochs):
    		c1.append(train_da(numpy.asarray(train_set)))
    		print 'Training 1st ae epoch %d, cost ' % epoch, c1[len(c1)-1]
    
    # train second autoencoder
    train_set2 = da.get_hidden_values(train_set).eval()
    
    train_data = T.dmatrix('train_data')
    x = T.dmatrix('x')  # the data is presented as rasterized images
    rng = numpy.random.RandomState(123)
    theano_rng = RandomStreams(rng.randint(2 ** 10))
    da2 = dA(
        numpy_rng=rng,
        theano_rng=theano_rng,
        input=x,
        n_visible=30,
        n_hidden=20
    )
    cost, updates = da2.get_cost_updates(
        corruption_level=0.,
        learning_rate=0.4
    )
    train_da2 = theano.function(
    		[train_data],
        cost,
        updates=updates,
        givens={
            x: train_data
        }
    )
    training_epochs = 20
    c1 = []
    for epoch in xrange(training_epochs):
    		c1.append(train_da2(numpy.asarray(train_set2)))
    		print 'Training 2nd ae epoch %d, cost ' % epoch, c1[len(c1)-1]
    
    for c in test_class:
    		m.features[c] = da2.get_hidden_values(da.get_hidden_values(m.features[c]).eval()).eval()
    
    m.train()
    m.dump('model/model_sda.out')
    with open('model/da1.out', 'w') as f:
    		pickle.dump(da, f, -1)
    with open('model/da2.out', 'w') as f:
    		pickle.dump(da2, f, -1)
    return up_bound, lower_bound