def feature_re_extract(): #pdb.set_trace() test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] m = ModelInterface.load('model/model.out') # construct train set train_set = [] for c in test_class: for i in m.features[c]: train_set.append(i) # construct autoencoder train_data = T.dmatrix('train_data') x = T.dmatrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=39, n_hidden=100 ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=0.4 ) train_da = theano.function( [train_data], cost, updates=updates, givens={ x: train_data } ) # train autoencoder training_epochs = 100 c1 = [] for epoch in xrange(training_epochs): c1.append(train_da(numpy.asarray(train_set))) print 'Training epoch %d, cost ' % epoch, c1[len(c1)-1] for c in test_class: m.features[c] = da.get_hidden_values(m.features[c]).eval() m.train() m.dump('model/model_da.out') with open('model/da.out', 'w') as f: pickle.dump(da, f, -1)
def train(self, train_set, batch_size=100): for i in xrange(len(self.layers) - 1): train_data = T.dmatrix("train_data") x = T.dmatrix("x") rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=self.layers[i], n_hidden=self.layers[i + 1] ) cost, updates = da.get_cost_updates(corruption_level=0.0, learning_rate=0.4) train_da = theano.function([train_data], cost, updates=updates, givens={x: train_data}) for epoch in xrange(20): train_cost = [] for index in xrange(len(train_set) / batch_size): train_cost.append(train_da(numpy.asarray(train_set[index * batch_size : (index + 1) * batch_size]))) print "Training 1st ae epoch %d, cost " % epoch, numpy.mean(train_cost) train_set = da.get_hidden_values(train_set).eval() self.dAs.append(da)
def feature_re_extract(): #pdb.set_trace() test_class = ['FAML_S', 'FDHH_S', 'FEAB_S', 'FHRO_S', 'FJAZ_S', 'FMEL_S', 'FMEV_S', 'FSLJ_S', 'FTEJ_S', 'FUAN_S', 'MASM_S', 'MCBR_S', 'MFKC_S', 'MKBP_S', 'MLKH_S', 'MMLP_S', 'MMNA_S', 'MNHP_S', 'MOEW_S', 'MPRA_S', 'MREM_S', 'MTLS_S'] m = ModelInterface.load('model/model.out') # construct train set train_set = [] up_bound = [] lower_bound = [] for c in test_class: for i in m.features[c]: train_set.append(i) ''' # put all values into -1~1 up_bound = [] lower_bound = [] for j in xrange(len(train_set[0])): up_bound.append(train_set[0][j]) lower_bound.append(train_set[0][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): up_bound[j] = max(up_bound[j], train_set[i][j]) lower_bound[j] = min(lower_bound[j], train_set[i][j]) for i in xrange(len(train_set)): for j in xrange(len(train_set[0])): train_set[i][j] = 2*((train_set[i][j]-lower_bound[j]) / (up_bound[j]-lower_bound[j]))-1 ''' # construct autoencoder train_data = T.dmatrix('train_data') x = T.dmatrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=39, n_hidden=30 ) cost, updates = da.get_cost_updates( corruption_level=0., learning_rate=0.4 ) train_da = theano.function( [train_data], cost, updates=updates, givens={ x: train_data } ) # train first autoencoder training_epochs = 20 c1 = [] for epoch in xrange(training_epochs): c1.append(train_da(numpy.asarray(train_set))) print 'Training 1st ae epoch %d, cost ' % epoch, c1[len(c1)-1] # train second autoencoder train_set2 = da.get_hidden_values(train_set).eval() train_data = T.dmatrix('train_data') x = T.dmatrix('x') # the data is presented as rasterized images rng = numpy.random.RandomState(123) theano_rng = RandomStreams(rng.randint(2 ** 10)) da2 = dA( numpy_rng=rng, theano_rng=theano_rng, input=x, n_visible=30, n_hidden=20 ) cost, updates = da2.get_cost_updates( corruption_level=0., learning_rate=0.4 ) train_da2 = theano.function( [train_data], cost, updates=updates, givens={ x: train_data } ) training_epochs = 20 c1 = [] for epoch in xrange(training_epochs): c1.append(train_da2(numpy.asarray(train_set2))) print 'Training 2nd ae epoch %d, cost ' % epoch, c1[len(c1)-1] for c in test_class: m.features[c] = da2.get_hidden_values(da.get_hidden_values(m.features[c]).eval()).eval() m.train() m.dump('model/model_sda.out') with open('model/da1.out', 'w') as f: pickle.dump(da, f, -1) with open('model/da2.out', 'w') as f: pickle.dump(da2, f, -1) return up_bound, lower_bound