def assessWithoutDist(self): print "reading data..." pictures = readCsv(self.dataBaseUrl + "data", self.totalNum) dao = ImageDao() imgs = dao.getAll() typeDict = {} for img in imgs: typeDict[img.imgId] = img.imgType print "training..." trainSet = pictures[:self.trainNum] self.knn.train(trainSet) testSet = pictures[self.trainNum:self.totalNum] accuracyList = [] heads = ['distance', 'accuracy', 'averageK'] print "predicting..." for d in range(2000, 4000, 20): accuracy, avgK = self.knn.predictForManyWithDist( testSet, self.trainNum, d, typeDict) item = [d, accuracy, avgK] accuracyList.append(item) print "distance:%d accuracy:%f%% averageK:%f" % ( d, accuracy * 100, avgK) saveCsv(self.resultBasePath + 'assessDist_Radius10_5000-1000.csv', heads, accuracyList)
def train(self): # Prediction self.y_pred = self.decoder_op # Targets (Labels) are the input data. y_true = self.X # Define loss and optimizer, minimize the squared error epsilon = 10e-6 cost = tf.reduce_mean(tf.pow(y_true - self.y_pred + epsilon, 2))**(1 / 2) """ predictions=self.y_pred float_labels = tf.cast(y_true, tf.float32) cross_entropy_loss = float_labels * tf.log(predictions + epsilon) + ( 1 - float_labels) * tf.log( 1 - predictions + epsilon) cross_entropy_loss = tf.negative(cross_entropy_loss) cost=tf.reduce_mean(tf.reduce_sum(cross_entropy_loss, 1)) """ optimizer = tf.train.RMSPropOptimizer( self.learning_rate).minimize(cost) # Initializing the variables init = tf.global_variables_initializer() saver = tf.train.Saver() self.sess.run(init) costs = [] total_batch = int(self.dataSet.shape[0] / self.batch_size) # Training cycle for epoch in range(self.training_epochs): # Loop over all batches for i in range(total_batch): batch_xs = self.dataSet[i * self.batch_size:(i + 1) * self.batch_size] # Run optimization op (backprop) and cost op (to get loss value) _, c = self.sess.run([optimizer, cost], feed_dict={self.X: batch_xs}) # Display logs per epoch step if i % self.display_step == 0: print "Epoch:%04d" % (epoch + 1), "batch:%04d" % ( i + 1), "cost=", "{:.9f}".format(c) val = [(epoch + 1), i, c] costs.append(val) saver.save(self.sess, 'network/model/model.ckpt') heads = ['epoch', 'cost'] saveCsv(fileName="network/netCost.csv", heads=heads, datas=costs) print("Optimization Finished!")
def assessWithoutRadius(self, k=None): if not k: k = self.k accuracyList = [] heads = ['radius', 'accuracy', 'averageCriticalDist'] for radius in range(5, 15): print radius, ':' print "blurring ..." data = MyData() data.saveCsvWithGaussianBlur(radius=radius) dao = ImageDao() imgs = dao.getAll() typeDict = {} for img in imgs: typeDict[img.imgId] = img.imgType zerNp = np.zeros([k, self.testNum]) testLabel = np.arange(self.trainNum, self.totalNum) for i in range(len(testLabel)): testLabel[i] = typeDict[str(testLabel[i]).zfill(5)] testLabel = (zerNp + testLabel).astype('int').T pictures = readCsv(self.dataBaseUrl + "data", self.totalNum) print "training..." trainSet = pictures[:self.trainNum] self.knn.train(trainSet) testSet = pictures[self.trainNum:self.totalNum] print "predicting..." accuracy, avgCriDist = self.knn.predictForManyWithK( testSet, testLabel, k, typeDict) item = [radius, accuracy, avgCriDist] accuracyList.append(item) print "k:%d radius:%f accuracy:%f%% averageCriticalDist:%f" % ( k, radius, accuracy * 100, avgCriDist) saveCsv(self.resultBasePath + 'assessRadiusK' + str(k) + '.csv', heads, accuracyList)
def assessFeaWithBlurWithoutK(self): print "reading neaFeaData..." netFea = readCsv(self.dataBaseUrl + "netFea", self.totalNum) * 256 * 40 print "reading data..." pictures = readCsv(self.dataBaseUrl + "data", self.totalNum) pictures = np.append(pictures, netFea, axis=1) print pictures.shape dao = ImageDao() imgs = dao.getAll() typeDict = {} for img in imgs: typeDict[img.imgId] = img.imgType print "training..." trainSet = pictures[:self.trainNum] self.knn.train(trainSet) testSet = pictures[self.trainNum:self.totalNum] accuracyList = [] heads = ['k', 'accuracy', 'averageCriticalDist'] print "predicting..." for k in range(1, 101): zerNp = np.zeros([k, self.testNum]) testLabel = np.arange(self.trainNum, self.totalNum) for i in range(len(testLabel)): testLabel[i] = typeDict[str(testLabel[i]).zfill(5)] testLabel = (zerNp + testLabel).astype('int').T accuracy, avgCriDist = self.knn.predictForManyWithK( testSet, testLabel, k, typeDict) item = [k, accuracy, avgCriDist] accuracyList.append(item) print "k:%d accuracy:%f%% averageCriticalDist:%f" % ( k, accuracy * 100, avgCriDist) saveCsv(self.resultBasePath + 'assessFeaWithBlurWithoutK.csv', heads, accuracyList)
def test_cA(train_set_x, n_hidden=None, learning_rate=0.01, training_epochs=20, batch_size=10, output_folder='network', contraction_level=.1): """ learning_rate:梯度下降法的学习率 training_epochs: 最大迭代次数 contraction_level:为正则项的权重 """ # 批量下降法,训练的批数 n_train_batches = train_set_x.shape[0] / batch_size # allocate symbolic variables for the data index = T.lscalar() # 每一批训练数据的索引 x = T.matrix('x') # 每一批训练数据 if not os.path.isdir(output_folder): os.makedirs(output_folder) os.chdir(output_folder) rng = numpy.random.RandomState(123) ca = AutoEnc_Theano(numpy_rng=rng, input=x, n_visible=50 * 50 * 3, n_hidden=n_hidden, n_batchsize=batch_size) cost, updates = ca.get_cost_updates(contraction_level=contraction_level, learning_rate=learning_rate) # 每一批,训练更新函数,输入参数index train_ca = theano.function( [index], [T.mean(ca.L_rec), ca.L_jacob], updates=updates, givens={x: train_set_x[index * batch_size:(index + 1) * batch_size]}) start_time = timeit.default_timer() ############ # TRAINING # ############ costs = [] # go through training epochs for epoch in xrange(training_epochs): # go through trainng set c = [] for batch_index in xrange(n_train_batches): print 'epoch:%d batch_index:%d' % (epoch, batch_index) c.append(train_ca(batch_index)) c_array = numpy.vstack(c) cost = [ epoch, numpy.mean(c_array[0]), numpy.mean(numpy.sqrt(c_array[1])) ] costs.append(cost) print 'Training epoch %d, reconstruction cost ' % cost[0], numpy.mean( cost[1]), ' jacobian norm ', cost[2] heads = ['epoch', 'cost', 'jacobian'] saveCsv(fileName="layer_1Cost.csv", heads=heads, datas=costs) end_time = timeit.default_timer() training_time = (end_time - start_time) print >> sys.stderr, ('The code for file ' + os.path.split(__file__)[1] + ' ran for %.2fm' % ((training_time) / 60.)) image = Image.fromarray( tile_raster_images(X=ca.W.get_value(borrow=True).T, img_shape=(50, 50 * 3), tile_shape=(10, 10), tile_spacing=(1, 1))) image.save('cae_filters.png') numpy.savetxt('layer_1W.csv', ca.W.get_value(borrow=True), delimiter=',') os.chdir('../')