def main(_): mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=True) ''' La data esta dividida en 3 partes: 55K data points para el entrenamiento (mnist.train) 10K para el test (mnist.test) 5K para la validacion (mnist.validation) Cada DATA POINT tiene dos partes: la imagen ("X") -> mnist.train.images la etiqueta correspondiente. ("Y") -> mnist.train.labels Cada imagen es de 28x28 pixeles (matriz) -> Lo podemos ver como un vector de 784 numeros cuando hacemos un Flatten. En el metodo de Softmax Regression no necesitamos explotar esa estructura 2D de la imagen. mnist.train.images es un tensor (un array n-dimensional) con forma [55k,784] osea 55K imagenes donde cada una es un vector de 784 posiciones//pixeles. 1er termino: indice en la lista de imagenes. 2o termino: indice a cada pixel de cada imagen. Cada entrada en el tensor es la intensidad de un pixel entre 0 y 1 para un pixel particular en una imagen particular. Las etiquetas tendran un valor de 0 a 9 segun el numero escrito. En este caso usaremos one-hot vectors (todas la dimensiones a 0 excepto 1) para las etiquetas. mnist.train.labels tiene la forma [55K,10] Softmax regression es un modelo simple. Este nos da una lista de valores entre 0 y 1 que al sumarlos alcanzan el 1. Estos valores son las probabilidades de que la imagen pertenezca a una clase determinada. Se computa a partir de los scores. Para aplicar Softmax debemos realizar dos pasos: Sumamos todas las evidencias de que nuestra input sea de una determinadas clases. Lo haremos mediante pesos (Cuanto mayor sea-> mas a favor que la evidencia indique la clase acertada). Convertimos esa evidencia en probabilidades. Ademas anadimos un bias que nos da informacion extra independientemente de como sean las inputs que le pasemos. y=nuestras probabilidades despues de aplicar softmax(evidencia) Softmax nos sirve como funcion de activacion//enlace que da forma a la salida de tal manera que distribuye las probabilidades segun el numero de clases. y=softmax(Wx+B) ''' # CREAMOS EL MODELO en TF. Al definir el modelo, ahora TF es capaz de entrenarlo facilmente porque sabe el grado entero de operaciones que vas a realizar. Entonces el automaticamente sabe aplicar el algoritmo de backpropagation para definir eficientemente como las variables (W y b) afectan a la perdida que deseamos minimizar. Podremos aplicar la optimizacion que deseemos # X es un placeholder, es decir, un valor que pondremos como entrada cuando queramos que TF runee una operacion//calculo. Queremos poder meter como input cualquier numero de imagenes cada una de ellas representada con un vector de 784 posiciones. Por lo tanto definimos el placeholder con [None,784] donde el none implica que la dimension puede tener cualquier longitud. x = tf.placeholder(tf.float32, [None, 784]) # Para los pesos de nuestro modelo creamos una Variable. Las variables son tensores modificables que se alojan en el grafo de interaccion de operaciones de TF. Como vemos se define como [784 pixeles,10 clases]. Es un tensor lleno de 0s W = tf.Variable(tf.zeros([784, 10])) # bias para las 10 clases. Es un tensor lleno de 0s b = tf.Variable(tf.zeros([10])) #Definimos el modelo. Las operaciones que lo definen. Luego ya aplicaremos softmax. y es la distribucion de probabilidad predicha. y = tf.matmul(x, W) + b # Defimos el coste e optimizacion. El coste nos indica cuan lejos esta nuestro modelo del deseado. y_ es la distribucion real (one-hot). y_ = tf.placeholder(tf.float32, [None, 10]) ''' The raw formulation of cross-entropy, # # tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(tf.nn.softmax(y)), # reduction_indices=[1])) # # can be numerically unstable. # # So here we use tf.nn.softmax_cross_entropy_with_logits on the raw outputs of 'y', and then average across the batch. ''' #hacemos la entropia cruzada a nivel de logits.Respecto nuestra prediccion y la real. cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(y, y_)) #Definimos que tipo de optimizacion queremos utilizar con tal de reducir la perdida. En este caso utilizamos el Gradiente Descendiente como optimizador para reducir la entropia cruzada con un rate de aprendizaje de 0.5 train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) sess = tf.InteractiveSession() #Modelo configurado para el entrenamiento. Antes de lanzarlo creamos una operacion para poden inicializar todas las variables que hemos creado. y la runeamos. # ENTRENAMIENTO tf.initialize_all_variables().run() #Hacemos la secuencia de entrenamiento 1K veces. for _ in range(1000): #En cada step del loop tenemos un batch de 100 data points de nuestro set de entrenamiento. #Usar pequenos batches de data random se le conoce como entrenamiento estocastico, en este caso hacemos un gradiente descendiente estocastico. Idealmente nos gustaria trabajar con toda la informacion en casa step del entrenamiento pero eso es muy costoso. batch_xs, batch_ys = mnist.train.next_batch(100) #runeamos el train_step alimentando a x con una porcion de la info (batch_xs) y a las etiquetas con un pequeno batch_ys. los batches con tal que reeplacen a los placeholders. sess.run(train_step, feed_dict={x: batch_xs, y_: batch_ys}) # TESTEAMOS NUESTRO MODELO (ya entrenado) #tf.argmax(y,1) nos dara el indice del mayor valor dentro del tensor y en algunos ejes, es decir nos dara la etiqueta a la imagen que el modelo cree que es la correcta. En el caso de aplicar esa funcion con y_ nos devolvera la etitqueta real. Al hacer un equal obtendremos una lista de booleanos si nuestra prediccion coincide o no. correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) #para saber la precision primero hacemos un cast es decir de [true,false,true,true] pasamos a [1,0,0,1]. Y luego le hacemos la media. accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print("precision") print( sess.run(accuracy, feed_dict={ x: mnist.test.images, y_: mnist.test.labels }))
def train(params, summary_every=100, print_every=250, save_every=1000, verbose=True): # Unpack params wavelength = params.get('wavelength', 532e-9) isNonNeg = params.get('isNonNeg', False) numIters = params.get('numIters', 1000) activation = params.get('activation', tf.nn.relu) opt_type = params.get('opt_type', 'ADAM') # switches doMultichannelConv = params.get('doMultichannelConv', False) doMean = params.get('doMean', False) doOpticalConv = params.get('doOpticalConv', True) doAmplitudeMask = params.get('doAmplitudeMask', False) doZernike = params.get('doZernike', False) doFC = params.get('doFC', False) doConv1 = params.get('doConv1', True) doConv2 = params.get('doConv2', True) doConv3 = params.get('doConv3', False) doNonnegReg = params.get('doNonnegReg', False) doOptNeg = params.get('doOptNeg', False) doTiledConv = params.get('doTiledConv', False) z_modes = params.get('z_modes', 1024) convdim1 = params.get('convdim1', 100) convdim2 = params.get('convdim2', 100) convdim3 = params.get('convdim3', 100) depth1 = params.get('depth1', 3) depth2 = params.get('depth2', 3) depth3 = params.get('depth3', 3) padamt = params.get('padamt', 0) dim = params.get('dim', 60) buff = params.get('buff', 4) rows = params.get('rows', 4) cols = params.get('cols', 4) # constraint helpers def nonneg(input_tensor): return tf.abs(input_tensor) if isNonNeg else input_tensor def vis_weights(W_conv, depth, buff, rows, cols, name): kernel_list = tf.split(tf.transpose(W_conv, [2, 0, 1, 3]), depth, axis=3) kernels_pad = [ tf.pad(kernel, [[0, 0], [buff, buff], [buff + 4, buff + 4], [0, 0]]) for kernel in kernel_list ] W_conv_tiled = tf.concat([ tf.concat(kernels_pad[i * cols:(i + 1) * cols], axis=2) for i in range(rows) ], axis=1) tf.summary.image(name, W_conv_tiled, 3) def vis_h(h_conv, depth, rows, cols, name): # this was for viewing multichannel convolution h_conv_split = tf.split(h_conv, depth, axis=3) h_conv_tiled = tf.concat([ tf.concat(h_conv_split[i * cols:(i + 1) * cols], axis=2) for i in range(rows) ], axis=1) tf.summary.image(name, h_conv_tiled, 3) sess = tf.InteractiveSession(config=tf.ConfigProto( allow_soft_placement=True)) # input placeholders classes = 10 with tf.name_scope('input'): x = tf.placeholder(tf.float32, shape=[None, 32, 32]) y_ = tf.placeholder(tf.int64, shape=[None]) keep_prob = tf.placeholder(tf.float32) x_image = tf.reshape(x, [-1, 32, 32, 1]) paddings = tf.constant([[ 0, 0, ], [padamt, padamt], [padamt, padamt], [0, 0]]) x_image = tf.pad(x_image, paddings) # x_image = tf.image.resize_nearest_neighbor(x_image, size=(dim, dim)) tf.summary.image('input', x_image, 3) # if not isNonNeg and not doNonnegReg: # x_image -= tf.reduce_mean(x_image) # regularizers global_step = tf.Variable(0, trainable=False) if doNonnegReg: reg_scale = tf.train.polynomial_decay(0., global_step, decay_steps=6000, end_learning_rate=6000.) psf_reg = optics_alt.nonneg_regularizer(reg_scale) else: psf_reg = None l2_reg = tf.contrib.layers.l2_regularizer(1e-1, scope=None) # build model h_conv_out = x_image fcdepth = 1 doVis = True if doConv1: with tf.name_scope('conv1'): if doTiledConv: tiled_dim = (32) * rows init_vals_pos = tf.truncated_normal( [tiled_dim, tiled_dim, 1, 1], stddev=0.1) + .1 W_conv1_tiled = tf.Variable(init_vals_pos, name='W_conv1_tiled') W_conv1_tiled = nonneg(W_conv1_tiled) tf.summary.image( "W_conv1_tiled", tf.expand_dims(tf.squeeze(W_conv1_tiled, -1), 0)) tile_pad = tiled_dim // 2 - 16 tile_paddings = tf.constant([[ 0, 0, ], [tile_pad, tile_pad], [tile_pad, tile_pad], [0, 0]]) x_padded = tf.pad(x_image, tile_paddings) tf.summary.image('input', x_padded, 3) fftpadamt = int(tiled_dim / 2) h_conv_tiled = tf.abs( optics.fft_conv2d(fftpad(x_padded, fftpadamt), fftpad_psf(W_conv1_tiled, fftpadamt))) h_conv_tiled = fftunpad( tf.cast(h_conv_tiled, dtype=tf.float32), fftpadamt) h_conv_split2d = split2d_layer(h_conv_tiled, rows, cols) b_conv1 = bias_variable([depth1], 'b_conv1') h_conv1 = h_conv_split2d + b_conv1 elif doOpticalConv: tiled_dim = (32) * cols tile_pad = tiled_dim // 2 - 16 tile_paddings = tf.constant([[ 0, 0, ], [tile_pad, tile_pad], [tile_pad, tile_pad], [0, 0]]) x_padded = tf.pad(x_image, tile_paddings) tf.summary.image('input', x_padded, 3) r_NA = tiled_dim / 2 hm_reg_scale = 1e-2 # initialize with optimized phase mask # mask = np.load('maskopt/quickdraw9_zernike1024.npy') # initializer = tf.constant_initializer(mask) initializer = None h_conv1_opt = optical_conv_layer( x_padded, hm_reg_scale, r_NA, n=1.48, wavelength=wavelength, activation=None, amplitude_mask=doAmplitudeMask, zernike=doZernike, n_modes=z_modes, initializer=initializer, name='opt_conv1_pos') # h_conv1_opt_neg = optical_conv_layer(x_padded, hm_reg_scale, r_NA, n=1.48, wavelength=wavelength, # activation=None, amplitude_mask=doAmplitudeMask, zernike=doZernike, # n_modes=z_modes, initializer=initializer, name='opt_conv1_neg') h_conv1_opt = tf.cast(h_conv1_opt, dtype=tf.float32) h_conv_split2d = split2d_layer(h_conv1_opt, 2 * rows, cols) b_conv1 = bias_variable([depth1], 'b_conv1') h_conv1 = h_conv_split2d + b_conv1 else: if doOptNeg: # positive weights init_vals_pos = tf.truncated_normal( [convdim1, convdim1, 1, depth1], stddev=0.1) + .1 W_conv1_pos = tf.Variable(init_vals_pos, name='W_conv1_pos') # W_conv1 = weight_variable([convdim1, convdim1, 1, depth1], name='W_conv1') W_conv1_pos = nonneg(W_conv1_pos) #W_conv1_nonneg /= tf.reduce_sum(tf.abs(W_conv1_nonneg)) # conservation of energy tf.contrib.layers.apply_regularization( l2_reg, weights_list=[tf.transpose(W_conv1_pos, [3, 0, 1, 2])]) # negative weights init_vals_neg = tf.truncated_normal( [convdim1, convdim1, 1, depth1], stddev=0.1) + .1 W_conv1_neg = tf.Variable(init_vals_neg, name='W_conv1_neg') # W_conv1 = weight_variable([convdim1, convdim1, 1, depth1], name='W_conv1') W_conv1_neg = nonneg(W_conv1_neg) # W_conv1_nonneg /= tf.reduce_sum(tf.abs(W_conv1_nonneg)) # conservation of energy tf.contrib.layers.apply_regularization( l2_reg, weights_list=[tf.transpose(W_conv1_neg, [3, 0, 1, 2])]) W_conv1 = tf.subtract(W_conv1_pos, W_conv1_neg) if doVis: vis_weights(W_conv1_pos, depth1, buff, rows, cols, 'W_conv1_pos') vis_weights(W_conv1_neg, depth1, buff, rows, cols, 'W_conv1_neg') elif isNonNeg: init_vals = tf.truncated_normal( [convdim1, convdim1, 1, depth1], stddev=0.1) W_conv1 = tf.Variable(init_vals, name='W_conv1_nn') + .1 # W_conv1 = weight_variable([convdim1, convdim1, 1, depth1], name='W_conv1') W_conv1 = nonneg(W_conv1) #W_conv1_nonneg /= tf.reduce_sum(tf.abs(W_conv1_nonneg)) # conservation of energy else: W_conv1 = weight_variable([convdim1, convdim1, 1, depth1], name='W_conv1') if psf_reg is not None: tf.contrib.layers.apply_regularization( psf_reg, weights_list=[tf.transpose(W_conv1, [3, 0, 1, 2])]) vis_weights(W_conv1, depth1, buff, rows, cols, 'W_conv1') W_conv1_flip = tf.reverse(W_conv1, axis=[0, 1]) # flip if using tfconv h_conv1 = conv2d(x_image, W_conv1_flip) h_conv1 /= tf.reduce_max(h_conv1, axis=[1, 2, 3], keep_dims=True) b_conv1 = bias_variable([depth1], 'b_conv1') h_conv1 = h_conv1 + b_conv1 vis_h(h_conv1, depth1, rows, cols, 'h_conv1') variable_summaries("h_conv1", h_conv1) h_conv1_drop = tf.nn.dropout(h_conv1, keep_prob) #h_pool1 = max_pool_2x2(h_conv1) h_pool1 = h_conv1_drop if doNonnegReg: h_pool1 = optics_alt.shifted_relu(h_pool1) else: h_pool1 = activation(h_pool1) variable_summaries("h_conv1_post", h_pool1) h_conv_out = h_pool1 #dim = 16 fcdepth = depth1 if doConv2: with tf.name_scope('conv2'): W_conv2 = weight_variable([convdim2, convdim2, depth1, depth2], name='W_conv2') # vis_weights(W_conv2, depth2, buff, rows, cols, 'W_conv2') b_conv2 = bias_variable([depth2], name='b_conv2') h_conv2 = conv2d(h_pool1, W_conv2) + b_conv2 # h_pool2 = max_pool_2x2(h_conv2) h_pool2 = h_conv2 variable_summaries("h_conv2", h_pool2) h_conv2_drop = tf.nn.dropout(h_pool2, keep_prob) h_conv2_drop = activation(h_conv2_drop) variable_summaries("h_conv2_post", h_conv2_drop) h_conv_out = h_conv2_drop # dim = 16 fcdepth = depth2 if doConv3: with tf.name_scope('conv3'): W_conv3 = weight_variable([convdim3, convdim3, depth2, depth3], name='W_conv3') # vis_weights(W_conv3, depth3, buff, rows, cols, 'W_conv3') b_conv3 = bias_variable([depth3], name='b_conv3') h_conv3 = conv2d(h_pool2, W_conv3) + b_conv3 h_pool3 = max_pool_2x2(h_conv3) variable_summaries("h_conv3", h_pool3) h_conv3_drop = tf.nn.dropout(h_pool3, keep_prob) h_conv3_drop = activation(h_conv3_drop) variable_summaries("h_conv3_post", h_conv3_drop) h_conv_out = h_conv3_drop fcdepth = depth3 dim = 16 # choose output layer here with tf.name_scope('fc'): h_conv_out = tf.cast(h_conv_out, dtype=tf.float32) fcsize = dim * dim * fcdepth hidden_dim = classes W_fc1 = weight_variable([fcsize, hidden_dim], name='W_fc1') b_fc1 = bias_variable([hidden_dim], name='b_fc1') h_conv_flat = tf.reshape(h_conv_out, [-1, fcsize]) y_out = tf.matmul(h_conv_flat, W_fc1) + b_fc1 # h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob) # W_fc2 = weight_variable([hidden_dim, classes]) # b_fc2 = bias_variable([classes]) # y_out = tf.matmul(h_fc1_drop, W_fc2) + b_fc2 tf.summary.image('output', tf.reshape(y_out, [-1, 2, 5, 1]), 3) # loss, train, acc with tf.name_scope('cross_entropy'): total_data_loss = tf.nn.softmax_cross_entropy_with_logits( labels=tf.one_hot(y_, classes), logits=y_out) data_loss = tf.reduce_mean(total_data_loss) reg_loss = tf.reduce_sum( tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) total_loss = tf.add(data_loss, reg_loss) tf.summary.scalar('data_loss', data_loss) tf.summary.scalar('reg_loss', reg_loss) tf.summary.scalar('total_loss', total_loss) if opt_type == 'ADAM': train_step = tf.train.AdamOptimizer(FLAGS.learning_rate).minimize( total_loss, global_step) elif opt_type == 'adadelta': train_step = tf.train.AdadeltaOptimizer(FLAGS.learning_rate_ad, rho=.9).minimize( total_loss, global_step) else: train_step = tf.train.MomentumOptimizer(FLAGS.learning_rate, momentum=0.5, use_nesterov=True).minimize( total_loss, global_step) with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(y_out, 1), y_) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) losses = [] # tensorboard setup merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test') tf.global_variables_initializer().run() # add ops to save and restore all the variables saver = tf.train.Saver(max_to_keep=2) save_path = os.path.join(FLAGS.log_dir, 'model.ckpt') x_train_all, y_train_all, x_test, y_test, _, _ = get_CIFAR10_grayscale( num_training=49000, num_validation=1000, num_test=0) num_training = x_train_all.shape[0] def get_feed(train, batch_size=50, augmentation=False): idcs = np.random.randint(0, num_training, batch_size) x = x_train_all[idcs, :, :] y = y_train_all[idcs] if augmentation: angle = np.random.uniform(low=0.0, high=20.0) x = rotate(x, angle, axes=(2, 1), reshape=True) x = resize(x, (32, 32)) return x, y for i in range(FLAGS.num_iters): x_train, y_train = get_feed(train=True, augmentation=False) _, loss, reg_loss_graph, train_accuracy, train_summary = sess.run( [train_step, total_loss, reg_loss, accuracy, merged], feed_dict={ x: x_train, y_: y_train, keep_prob: FLAGS.dropout }) losses.append(loss) if i % summary_every == 0: train_writer.add_summary(train_summary, i) test_summary, test_accuracy = sess.run([merged, accuracy], feed_dict={ x: x_test, y_: y_test, keep_prob: 1.0 }) test_writer.add_summary(test_summary, i) if verbose: print('step %d: test acc %g' % (i, test_accuracy)) if i > 0 and i % save_every == 0: # print("Saving model...") saver.save(sess, save_path, global_step=i) if i % print_every == 0: if verbose: print('step %d:\t loss %g,\t reg_loss %g,\t train acc %g' % (i, loss, reg_loss_graph, train_accuracy)) #test_batches = [] # for i in range(4): # idx = i*500 # batch_acc = accuracy.eval(feed_dict={x: x_test[idx:idx+500, :], y_: y_test[idx:idx+500], keep_prob: 1.0}) # test_batches.append(batch_acc) # test_acc = np.mean(test_batches) test_acc = accuracy.eval(feed_dict={x: x_test, y_: y_test, keep_prob: 1.0}) print('final step %d, train accuracy %g, test accuracy %g' % (i, train_accuracy, test_acc)) #sess.close() train_writer.close() test_writer.close()
def train(sub_dir, logging, model_save_dir, result_save_dir): if not os.path.exists(options['word_fts_path']): meta_data, train_data, test_data = get_video_data_jukin(options['video_data_path_train'], options['video_data_path_test']) captions = meta_data['Description'].values for c in string.punctuation: captions = map(lambda x: x.replace(c, ''), captions) wordtoix, ixtoword, bias_init_vector = preProBuildWordVocab(logging, captions, word_count_threshold=1) np.save(options['ixtoword_path'], ixtoword) np.save(options['wordtoix_path'], wordtoix) get_word_embedding(options['word_embedding_path'],options['wordtoix_path'],options['ixtoword_path'],options['word_fts_path']) word_emb_init = np.array(np.load(open(options['word_fts_path'])).tolist(),np.float32) else: wordtoix = (np.load(options['wordtoix_path'])).tolist() ixtoword = (np.load(options['ixtoword_path'])).tolist() word_emb_init = np.array(np.load(open(options['word_fts_path'])).tolist(),np.float32) train_data = get_video_data_HL(options['video_data_path_train']) # get h5 file list if finetune: start_epoch = 150 MODEL = model_save_dir+'/model-'+str(start_epoch-1) model = SSAD_SCM(options,word_emb_init) inputs, outputs = model.build_train() t_loss = outputs['loss_all'] t_loss_ssad = outputs['loss_ssad'] t_loss_regular = outputs['reg_loss'] t_positive_loss_all = outputs['positive_loss_all'] t_hard_negative_loss_all = outputs['hard_negative_loss_all'] t_easy_negative_loss_all = outputs['easy_negative_loss_all'] t_smooth_center_loss_all = outputs['smooth_center_loss_all'] t_smooth_width_loss_all = outputs['smooth_width_loss_all'] t_feature_segment = inputs['feature_segment'] t_sentence_index_placeholder = inputs['sentence_index_placeholder'] t_sentence_w_len = inputs['sentence_w_len'] t_gt_overlap = inputs['gt_overlap'] config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.3 sess = tf.InteractiveSession(config=config) optimizer = optimizer_factory[options['optimizer']](**options['opt_arg'][options['optimizer']]) if options['clip']: gvs = optimizer.compute_gradients(t_loss) capped_gvs = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gvs] train_op = optimizer.apply_gradients(capped_gvs) else: train_op = optimizer.minimize(t_loss) with tf.device("/cpu:0"): saver = tf.train.Saver(max_to_keep=200) tf.initialize_all_variables().run() with tf.device("/cpu:0"): if finetune: saver.restore(sess, MODEL) ############################################# start training #################################################### tStart_total = time.time() for epoch in range(options['max_epochs']): index = np.arange(len(train_data)) np.random.shuffle(index) train_data = train_data[index] tStart_epoch = time.time() loss_list = np.zeros(len(train_data)) # each item in loss_epoch record the loss of this h5 file loss_ssad_list = np.zeros(len(train_data)) loss_positive_loss_all_list = np.zeros(len(train_data)) loss_hard_negative_loss_all_list = np.zeros(len(train_data)) loss_easy_negative_loss_all_list = np.zeros(len(train_data)) loss_smooth_center_loss_all_list = np.zeros(len(train_data)) loss_smooth_width_loss_all_list = np.zeros(len(train_data)) for current_batch_file_idx in xrange(len(train_data)): logging.info("current_batch_file_idx = {:d}".format(current_batch_file_idx)) logging.info(train_data[current_batch_file_idx]) tStart = time.time() current_batch = h5py.File(train_data[current_batch_file_idx],'r') # processing sentence current_captions_tmp = current_batch['sentence'] current_captions = [] for ind in range(options['batch_size']): current_captions.append(current_captions_tmp[ind]) current_captions = np.array(current_captions) for ind in range(options['batch_size']): for c in string.punctuation: current_captions[ind] = current_captions[ind].replace(c,'') for i in range(options['batch_size']): current_captions[i] = current_captions[i].strip() if current_captions[i] == '': current_captions[i] = '.' current_caption_ind = map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ') if word in wordtoix], current_captions) current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=options['max_sen_len'] -1) current_caption_matrix = np.hstack( [current_caption_matrix, np.zeros( [len(current_caption_matrix),1]) ] ).astype(int) current_caption_length = np.array( map(lambda x: (x != 0).sum(), current_caption_matrix )) # save the sentence length of this batch # processing video current_video_feats = np.array(current_batch['video_source_fts']) current_anchor_input = np.array(current_batch['anchor_input']) current_ground_interval = np.array(current_batch['ground_interval']) current_video_name = current_batch['video_name'] current_video_duration = np.array(current_batch['video_duration']) _, loss, loss_ssad, positive_loss_all, hard_negative_loss_all, easy_negative_loss_all,\ smooth_center_loss_all, smooth_width_loss_all, loss_regular = sess.run( [train_op, t_loss, t_loss_ssad , t_positive_loss_all, t_hard_negative_loss_all, \ t_easy_negative_loss_all, t_smooth_center_loss_all, t_smooth_width_loss_all, t_loss_regular], \ feed_dict={ t_feature_segment: current_video_feats, t_sentence_index_placeholder: current_caption_matrix, t_sentence_w_len: current_caption_length, t_gt_overlap: current_anchor_input }) loss_list[current_batch_file_idx] = loss loss_ssad_list[current_batch_file_idx] = loss_ssad loss_positive_loss_all_list[current_batch_file_idx] = positive_loss_all loss_hard_negative_loss_all_list[current_batch_file_idx] = hard_negative_loss_all loss_easy_negative_loss_all_list[current_batch_file_idx] = easy_negative_loss_all loss_smooth_center_loss_all_list[current_batch_file_idx] = smooth_center_loss_all loss_smooth_width_loss_all_list[current_batch_file_idx] = smooth_width_loss_all logging.info("loss = {:f} loss_ssad = {:f} loss_regular = {:f} positive_loss_all = {:f} hard_negative_loss_all = {:f} easy_negative_loss_all = {:f} smooth_center_loss_all = {:f} smooth_width_loss_all = {:f}".format(loss, loss_ssad, loss_regular, positive_loss_all, hard_negative_loss_all, easy_negative_loss_all, smooth_center_loss_all, smooth_width_loss_all)) if finetune: logging.info("Epoch: {:d} done.".format(epoch+start_epoch)) else: logging.info("Epoch: {:d} done.".format(epoch)) tStop_epoch = time.time() logging.info('Epoch Time Cost: {:f} s'.format(round(tStop_epoch - tStart_epoch,2))) logging.info('Current Epoch Mean loss {:f}'.format(np.mean(loss_list))) logging.info('Current Epoch Mean loss_ssad {:f}'.format(np.mean(loss_ssad_list))) logging.info('Current Epoch Mean positive_loss_all {:f}'.format(np.mean(loss_positive_loss_all_list))) logging.info('Current Epoch Mean hard_negative_loss_all {:f}'.format(np.mean(loss_hard_negative_loss_all_list))) logging.info('Current Epoch Mean easy_negative_loss_all {:f}'.format(np.mean(loss_easy_negative_loss_all_list))) logging.info('Current Epoch Mean smooth_center_loss_all {:f}'.format(np.mean(loss_smooth_center_loss_all_list))) logging.info('Current Epoch Mean smooth_width_loss_all {:f}'.format(np.mean(loss_smooth_width_loss_all_list))) #################################################### test ################################################################################################ if np.mod(epoch, 1) == 0 and epoch >= 50: if finetune: logging.info('Epoch {:d} is done. Saving the model ...'.format(epoch+start_epoch)) else: logging.info('Epoch {:d} is done. Saving the model ...'.format(epoch)) with tf.device("/cpu:0"): if finetune: saver.save(sess, os.path.join(model_save_dir, 'model'), global_step=epoch+start_epoch) else: saver.save(sess, os.path.join(model_save_dir, 'model'), global_step=epoch) logging.info("Finally, saving the model ...") with tf.device("/cpu:0"): if finetune: saver.save(sess, os.path.join(model_save_dir, 'model'), global_step=epoch+start_epoch) else: saver.save(sess, os.path.join(model_save_dir, 'model'), global_step=epoch) tStop_total = time.time() logging.info("Total Time Cost: {:f} s".format(round(tStop_total - tStart_total,2)))
def train(model, hparams, ckpt_dir, hparams_file_path, data_and_labels): # Data and labels. train_batch_data, train_batch_label, test_batch_data, test_batch_label = data_and_labels # Create session and save graph. sess = tf.InteractiveSession() summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(ckpt_dir, sess.graph) # Restore variables, train continually or initialize all. saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) ckpt = tf.train.get_checkpoint_state(ckpt_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) trained_steps = int(ckpt.model_checkpoint_path.split('-')[-1]) # The following code is for tf.train.string_input_producer(), where define num_epochs parameter. # sess.run(tf.local_variables_initializer()) else: sess.run(tf.global_variables_initializer()) trained_steps = 0 # The following code is for tf.train.string_input_producer(), where define num_epochs parameter. # sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: # There, k is default to 10. top_k_acc = [] for epoch in range(hparams.epoches): #####--PartI: Model train and test. --start--##### def train_one_step(): # Feed dict, train phase. feed_data, feed_label = sess.run([train_batch_data, train_batch_label]) feed_dict = {model.ph_data: feed_data, model.ph_label: feed_label, model.ph_is_training: True} # For per summary_step, print current train acc, otherwise just train a batch_size data. if step % FLAGS.summary_step == 0 and step != 0: op_list = [model.train_op, model.argmax_output, summary_op] _, train_out_label, summaries = sess.run(op_list, feed_dict=feed_dict) # Summary flush. summary_writer.add_summary(summaries, global_step=global_step) summary_writer.flush() # predict_correct_num = tf.nn.in_top_k(np.argmax(probs, axis=1), feed_label) train_std_label = np.argmax(feed_label, axis=1) predict_correct_num = np.sum(train_out_label == train_std_label) batch_accuracy = predict_correct_num / hparams.batch_size train_acc.append(batch_accuracy) # Output labels and train_acc. avg_train_accuracy = np.mean(train_acc[-FLAGS.summary_step:-1]) print('{}\n{} | train_acc: {} {} {}'.format(train_std_label, train_out_label, batch_accuracy, avg_train_accuracy, global_step)) else: _, train_out_label = sess.run([model.train_op, model.argmax_output], feed_dict=feed_dict) train_std_label = np.argmax(feed_label, axis=1) predict_correct_num = np.sum(train_out_label == train_std_label) batch_accuracy = predict_correct_num / hparams.batch_size train_acc.append(batch_accuracy) def test_ckpt(current_epoch, global_step): start_time_test = time.time() test_steps = int(hparams.test_data_num / hparams.batch_size) sum = 0 for test_step in range(test_steps): test_feed_data, test_feed_label = sess.run([test_batch_data, test_batch_label]) output_label = sess.run(model.argmax_output, feed_dict={model.ph_data: test_feed_data, model.ph_is_training: False}) standard_label = np.argmax(test_feed_label, axis=1) sum += np.sum(output_label == standard_label) # Output labels and batch_accuracy. if test_step % FLAGS.summary_step == 0 and test_step != 0: batch_accuracy = np.sum(output_label == standard_label) / hparams.batch_size print('{}\n{} | batch_acc: {}'.format(standard_label, output_label, batch_accuracy)) test_accuracy = sum / (test_steps * hparams.batch_size) print('test acc:{} {}'.format(test_accuracy, global_step)) duration = time.time() - start_time_test print('Test in {} epoch cost {}'.format(current_epoch, duration)) logging.error('test acc:{} {}'.format(test_accuracy, global_step)) return test_accuracy start_time_epoch = time.time() steps = int(hparams.train_data_num / hparams.batch_size) train_acc = [] for step in range(steps): # Global step and global epoches. global_step = trained_steps + epoch * steps + step # current_epoch = global_step * hparams.batch_size // hparams.train_data_num # Train one step. train_one_step() # For per checkpoint_step, calculate and print acc in test data. if step % FLAGS.checkpoint_step == 0 and step != 0: # Test. test_accuracy = test_ckpt(epoch, global_step) # Write the test_accuracy to file write_test_acc(hparams_file_path, epoch, global_step, test_accuracy) # Save ckpt. save_ckpt_file(sess, saver, ckpt_dir, global_step, top_k_acc, test_accuracy) duration = time.time() - start_time_epoch print('The {} epoch duration: {}'.format(epoch, duration)) #####--PartI: Model train and test. --end--##### #####--PartII: Video smoke_detection_win.--start--##### # smoke_detection_win(sess, model) #####--PartII: Video smoke_detection_win.--end--##### except Exception as e: logging.exception(e) finally: coord.request_stop() coord.join(threads) sess.close()
if step % 100 == 0: acc = sess.run(accuracy, feed_dict={_inputs: x_batch, _labels: y_batch, _seqlens: seqlen_batch}) print("Accuracy at %d: %.5f" % (step, acc)) for test_batch in range(5): x_test, y_test, seqlen_test = get_sentence_batch(batch_size, test_x, test_y, test_seqlens) batch_pred, batch_acc = sess.run([tf.argmax(final_output, 1), accuracy], feed_dict={_inputs: x_test, _labels: y_test, _seqlens: seqlen_test}) print("Test batch accuracy %d: %.5f" % (test_batch, batch_acc)) output_example = sess.run([outputs], feed_dict={_inputs: x_test, _labels: y_test, _seqlens: seqlen_test}) states_example = sess.run([states[1]], feed_dict={_inputs: x_test, _labels: y_test, # In[11]: with tf.InteractiveSession() as sess: print( embed.eval())
def _init_session(self): self.sess = tf.InteractiveSession() self.sess.run(tf.global_variables_initializer()) if self.mode == 'train': if self.train_initializer is not None: self.sess.run(self.train_initializer)
def omniglot(load_model=False): sess = tf.InteractiveSession() saver = tf.train.Saver() if (load_model): ckpt = tf.train.get_checkpoint_state('./saved/') if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) else: print("No Checkpoint found, setting load to false") load_model = False ##Global variables for Omniglot Problem nb_reads = 4 controller_size = 200 memory_shape = (128, 40) nb_class = 5 input_size = 20 * 20 batch_size = 16 nb_samples_per_class = 10 input_ph = tf.placeholder( dtype=tf.float32, shape=(batch_size, nb_class * nb_samples_per_class, input_size)) #(batch_size, time, input_dim) target_ph = tf.placeholder( dtype=tf.int32, shape=(batch_size, nb_class * nb_samples_per_class)) #(batch_size, time)(label_indices) #Load Data generator = OmniglotGenerator(data_folder='./data/omniglot', batch_size=batch_size, nb_samples=nb_class, nb_samples_per_class=nb_samples_per_class, max_rotation=0., max_shift=0., max_iter=None) output_var, output_var_flatten, params = memory_augmented_neural_network( input_ph, target_ph, batch_size=batch_size, nb_class=nb_class, memory_shape=memory_shape, controller_size=controller_size, input_size=input_size, nb_reads=nb_reads) print 'Compiling the Model' with tf.variable_scope("Weights", reuse=True): W_key = tf.get_variable('W_key', shape=(nb_reads, controller_size, memory_shape[1])) b_key = tf.get_variable('b_key', shape=(nb_reads, memory_shape[1])) W_add = tf.get_variable('W_add', shape=(nb_reads, controller_size, memory_shape[1])) b_add = tf.get_variable('b_add', shape=(nb_reads, memory_shape[1])) W_sigma = tf.get_variable('W_sigma', shape=(nb_reads, controller_size, 1)) b_sigma = tf.get_variable('b_sigma', shape=(nb_reads, 1)) W_xh = tf.get_variable('W_xh', shape=(input_size + nb_class, 4 * controller_size)) b_h = tf.get_variable('b_xh', shape=(4 * controller_size)) W_o = tf.get_variable('W_o', shape=(controller_size + nb_reads * memory_shape[1], nb_class)) b_o = tf.get_variable('b_o', shape=(nb_class)) W_rh = tf.get_variable('W_rh', shape=(nb_reads * memory_shape[1], 4 * controller_size)) W_hh = tf.get_variable('W_hh', shape=(controller_size, 4 * controller_size)) gamma = tf.get_variable('gamma', shape=[1], initializer=tf.constant_initializer(0.95)) params = [ W_key, b_key, W_add, b_add, W_sigma, b_sigma, W_xh, W_rh, W_hh, b_h, W_o, b_o ] #output_var = tf.cast(output_var, tf.int32) target_ph_oh = tf.one_hot(target_ph, depth=generator.nb_samples) print 'Output, Target shapes: ', output_var.get_shape().as_list( ), target_ph_oh.get_shape().as_list() cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits( logits=output_var, labels=target_ph_oh), name="cost") opt = tf.train.AdamOptimizer(learning_rate=1e-3) train_step = opt.minimize(cost, var_list=params) #train_step = tf.train.AdamOptimizer(1e-3).minimize(cost) accuracies = accuracy_instance(tf.argmax(output_var, axis=2), target_ph, batch_size=generator.batch_size) sum_out = tf.reduce_sum(tf.reshape( tf.one_hot(tf.argmax(output_var, axis=2), depth=generator.nb_samples), (-1, generator.nb_samples)), axis=0) print 'Done' tf.summary.scalar('cost', cost) for i in range(generator.nb_samples_per_class): tf.summary.scalar('accuracy-' + str(i), accuracies[i]) merged = tf.summary.merge_all() #writer = tf.summary.FileWriter('/tmp/tensorflow', graph=tf.get_default_graph()) train_writer = tf.summary.FileWriter('/tmp/tensorflow/', sess.graph) t0 = time.time() all_scores, scores, accs = [], [], np.zeros(generator.nb_samples_per_class) if not load_model: sess.run(tf.global_variables_initializer()) print 'Training the model' try: for i, (batch_input, batch_output) in generator: feed_dict = {input_ph: batch_input, target_ph: batch_output} #print batch_input.shape, batch_output.shape train_step.run(feed_dict) score = cost.eval(feed_dict) acc = accuracies.eval(feed_dict) temp = sum_out.eval(feed_dict) summary = merged.eval(feed_dict) train_writer.add_summary(summary, i) print i, ' ', temp all_scores.append(score) scores.append(score) accs += acc if i > 0 and not (i % 100): print(accs / 100.0) print('Episode %05d: %.6f' % (i, np.mean(score))) scores, accs = [], np.zeros(generator.nb_samples_per_class) saver.save(sess, './saved/model.ckpt', global_step=i + 1) except KeyboardInterrupt: print time.time() - t0 pass
def test(batch_size, num_test, epoch_id, lstm_mod, html_type): #share placeholders keep_prob = tf.placeholder(tf.float32, name='keep_prob_placeholder') y = tf.placeholder(tf.float32, [ None, ], name='label_placeholder') #image placeholders x = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT, IMAGE_WIDTH, 3], name='image_placeholder') vr_type = tf.placeholder(tf.float32, [None, type_num], name='type_placeholder') #text placeholders title = tf.placeholder(tf.int32, (None, None)) title_len = tf.placeholder(tf.int32, (None)) snippet = tf.placeholder(tf.int32, (None, None)) snippet_len = tf.placeholder(tf.int32, (None)) session_title = tf.placeholder(tf.int32, (None, None)) sess_len_title = tf.placeholder(tf.int32, (None)) session_snippet = tf.placeholder(tf.int32, (None, None)) sess_len_snippet = tf.placeholder(tf.int32, (None)) sessions_weight_snippet = tf.placeholder( tf.float32, [None, sess_sen_len_snippet, feature_dim]) attention_title = tf.placeholder(tf.float32, [None, max_title_len_top, feature_dim]) attention_snippet = tf.placeholder( tf.float32, [None, max_snippet_len_top, feature_dim]) #html placeholders html_tag = tf.placeholder(tf.int32, [None, html_dim], name='tag_placeholder') html_class = tf.placeholder(tf.int32, [None, html_dim], name='class_placeholder') #with tf.name_scope('image'): image_placeholders = [x, vr_type, keep_prob] pred_image = image(image_placeholders) #with tf.name_scope('title'): title_placeholders = [ title, title_len, attention_title, session_title, sess_len_title ] pred_title = text(title_placeholders, 'title') #with tf.name_scope('snippet'): snippet_placeholders = [ snippet, snippet_len, attention_snippet, session_snippet, sess_len_snippet, sessions_weight_snippet ] pred_snippet = text(snippet_placeholders, 'snippet') #with tf.name_scope('html'): html_placeholders = [html_tag, html_class] pred_html = html(html_placeholders) pred_combine = tf.squeeze( tf.concat([pred_image, pred_title, pred_snippet, pred_html], 1)) balance_raw = tf.Variable(tf.ones([4]), name='balance', trainable=True) balance_sum = tf.reduce_sum(balance_raw) balance = tf.div(balance_raw, balance_sum) pred_final = tf.reduce_sum(tf.multiply(pred_combine, balance), 1) with tf.name_scope("loss"): sigmoid_cross_entropy = cross_entropy(labels=tf.squeeze(y), logits=pred_final) loss_cross_entropy = tf.reduce_mean(sigmoid_cross_entropy, name='loss_cross_entropy') loss_mse = tf.reduce_mean(tf.square(pred_final - tf.squeeze(y))) loss = loss_mse sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() saver.restore( sess, model_base + 'checkpoint/JRE/model_JRE_epoch_' + epoch_id + '.ckpt') test_dataset = '201709' tvt_file = data_base + '201709/info_top_10_id_201709' images_test, rels_test, num_data_test = set_data_image( tvt_file, test_dataset) type_test = set_data_type(tvt_file) titles_test, snippets_test, rels_test, queries_test, num_data_test = set_data_text( 'text', tvt_file) sess_title_test, sessions_weight_title_test = set_data_sess( 'title', tvt_file, test_dataset) sess_snippet_test, sessions_weight_snippet_test = set_data_sess( 'snippet', tvt_file, test_dataset) DFS_tag_test, DFS_class_test, BFS_tag_test, BFS_class_test, rels_test, num_data_test = set_data_html( tvt_file, test_dataset) print('test data num:{}'.format(num_data_test)) if num_test == 'all': num_test = num_data_test else: num_test = int(num_test) dropout_rate_test = 1 print("{} Start testing...".format(datetime.now())) loss_total = 0. pred_all, pred_combine_all, label_all = [], [], [] iters = num_test / batch_size print('Start......') start = time.time() for iter in xrange(iters): ind = set_random_ind(num_data_test, batch_size, random=False, iter_=iter) pic_input, label_input = data_batch_image(images_test, rels_test, num_data_test, batch_size, ind) type_input = data_batch_type(type_test, batch_size, ind) title_input, title_len_input, label_input, attention_title_input = data_batch_text( titles_test, queries_test, window_weight, rels_test, num_data_test, batch_size, max_title_len_top, ind) snippet_input, snippet_len_input, label_input, attention_snippet_input = data_batch_text( snippets_test, queries_test, window_weight, rels_test, num_data_test, batch_size, max_snippet_len_top, ind) sess_title_input, sess_title_len_input, label_input, attention_sess_title_input = data_batch_text( sess_title_test, queries_test, window_weight, rels_test, num_data_test, batch_size, sess_sen_len_title, ind) sess_snippet_input, sess_snippet_len_input, label_input, attention_sess_snippet_input = data_batch_text( sess_snippet_test, queries_test, window_weight, rels_test, num_data_test, batch_size, sess_sen_len_snippet, ind) sessions_weight_snippet_input = sess_weight_batch( 'snippet', batch_size, sessions_weight_snippet_test, ind) if html_type == 'DFS': tag_input, label_input = data_batch_html(DFS_tag_test, rels_test, ind) class_input, label_input = data_batch_html(DFS_class_test, rels_test, ind) elif html_type == 'BFS': tag_input, label_input = data_batch_html(BFS_tag_test, rels_test, ind) class_input, label_input = data_batch_html(BFS_class_test, rels_test, ind) pred_final_, pred_combine_, loss_, loss_cross_entropy_, loss_mse_, balance_ = sess.run( [ pred_final, pred_combine, loss, loss_cross_entropy, loss_mse, balance ], feed_dict={ y: label_input, keep_prob: dropout_rate, x: pic_input, vr_type: type_input, title: title_input, title_len: title_len_input, session_title: sess_title_input, sess_len_title: sess_title_len_input, attention_title: attention_title_input, snippet: snippet_input, snippet_len: snippet_len_input, session_snippet: sess_snippet_input, sess_len_snippet: sess_snippet_len_input, sessions_weight_snippet: sessions_weight_snippet_input, attention_snippet: attention_snippet_input, html_tag: tag_input, html_class: class_input }) loss_total += loss_ * batch_size pred_all.append(pred_final_) pred_combine_all.append(pred_combine_) label_all.append(label_input) end = time.time() print('Total Time:{}'.format(end - start)) print('average loss: {}'.format(loss_total * 1.0 / iters / batch_size)) pred_all = np.squeeze(np.concatenate((np.array(pred_all)), axis=0)) label_all = np.squeeze(np.concatenate((np.array(label_all)), axis=0)) fusion_file = open( result_base + 'JRE_' + test_dataset + '_' + epoch_id + '.txt', 'w') for i in range(iters * batch_size): fusion_file.write(images_test[i].split('/')[-1] + '\t' + str(label_all[i]) + '\t' + str(pred_all[i]) + '\n')
X_train = data_train[:, 1:] y_train = data_train[:, 0] X_test = data_test[:, 1:] y_test = data_test[:, 0] # Number of stocks in training data n_stocks = X_train.shape[1] # Neurons n_neurons_1 = 1024 n_neurons_2 = 512 n_neurons_3 = 256 n_neurons_4 = 128 # Session net = tf.InteractiveSession() # Placeholder X = tf.placeholder(dtype=tf.float32, shape=[None, n_stocks]) Y = tf.placeholder(dtype=tf.float32, shape=[None]) # Initializers sigma = 1 weight_initializer = tf.variance_scaling_initializer(mode="fan_avg", distribution="uniform", scale=sigma) bias_initializer = tf.zeros_initializer() # Hidden weights W_hidden_1 = tf.Variable(weight_initializer([n_stocks, n_neurons_1])) bias_hidden_1 = tf.Variable(bias_initializer([n_neurons_1])) W_hidden_2 = tf.Variable(weight_initializer([n_neurons_1, n_neurons_2])) bias_hidden_2 = tf.Variable(bias_initializer([n_neurons_2]))
def main(_): with tf.device('/gpu:0'): # Input x = tf.placeholder(tf.float32, [None, model.time_step, model.num_input]) y_ = tf.placeholder(tf.float32, [None, model.num_class]) # Create lstm model y_lstm, keep_prob = model.lstm(x) # Define loss with tf.name_scope('loss'): cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_, logits=y_lstm) cross_entropy = tf.reduce_mean(cross_entropy) # Define optimizer with tf.name_scope('adam_optimizer'): train_step = tf.train.AdamOptimizer(learn_rate).minimize(cross_entropy) # Create the node to calculate ccc with tf.name_scope('accuracy'): correct_prediction = tf.equal(tf.argmax(y_lstm, 1), tf.argmax(y_, 1)) correct_prediction = tf.cast(correct_prediction, tf.float32) accuracy = tf.reduce_mean(correct_prediction) # Create the node to calculate auc with tf.name_scope('auc'): labels = tf.reshape(tf.slice(tf.cast(y_, dtype=tf.bool), [0, 0], [-1, 1]), [-1]) predictions = tf.reshape( tf.subtract(tf.slice(y_lstm, [0, 0], [-1, 1]), tf.slice(y_lstm, [0, 1], [-1, 1])), [-1]) # Min Max Normalization Y_pred = (predictions - tf.reduce_min(predictions)) / ( tf.reduce_max(predictions) - tf.reduce_min(predictions)) roc_auc, roc_auc_update_op = tf.metrics.auc(labels, Y_pred, curve='ROC', name='roc') # Create the node to calculate acc with tf.name_scope('metrics'): acc, acc_op = tf.metrics.accuracy(tf.argmax(y_, 1), tf.argmax(y_lstm, 1)) rec, rec_op = tf.metrics.recall(tf.argmax(y_, 1), tf.argmax(y_lstm, 1)) all_pos = tf.reduce_sum(tf.argmin(y_lstm, 1)) all_neg = tf.reduce_sum(tf.argmax(y_lstm, 1)) fn, fn_op = tf.metrics.false_negatives(tf.argmax(y_, 1), tf.argmax(y_lstm, 1)) fp, fp_op = tf.metrics.false_positives(tf.argmax(y_, 1), tf.argmax(y_lstm, 1)) # Add ops to save and restore all the variables saver = tf.train.Saver() sess = tf.InteractiveSession() with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: for seed in range(1, seeds_num + 1): print('*' * 30, 'seed=', seed, '*' * 30) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(LOGS_DIRECTORY, graph=tf.get_default_graph()) sum_acc = 0 sum_auc = 0 sum_spec = 0 sum_recall = 0 record_fn = 0 record_fp = 0 training_accuracy_list = [] all_piRNA = input_data.read_all(TRAIN_IMAGES, TRAIN_LABELS, test_size=test_size, seed=seed, is_display=is_display) test_accuracy_list = [] for fold in range(10): print('fold %d:' % fold) piRNA = input_data.read_CV_datasets(fold, int(DATA_NUM * (1 - test_size)), all_piRNA) for i in range(TOTAL_BATCH): batch_x, batch_y = piRNA.train.next_batch(batch_size) batch_x = batch_x.reshape(batch_size, model.time_step, model.num_input) step, training_accuracy = sess.run([train_step, accuracy], feed_dict={x: batch_x, y_: batch_y, keep_prob: 0.5}) # print out results if i % 50 == 0: print('step %d, training accuracy %g' % (i, training_accuracy)) training_accuracy_list.append(training_accuracy) if i % 1000 == 0: print('test accuracy %g' % accuracy.eval( feed_dict={x: piRNA.test.images.reshape(-1, model.time_step, model.num_input), y_: piRNA.test.labels, keep_prob: 1.0})) auc, acc, recall, pred_neg, false_nega, false_posi, pred_pos = sess.run( [roc_auc_update_op, acc_op, rec_op, all_neg, fn_op, fp_op, all_pos], feed_dict={x: piRNA.validation.images.reshape(-1, model.time_step, model.num_input), y_: piRNA.validation.labels, keep_prob: 1.0}) # update specificity current_fn = false_nega - record_fn current_fp = false_posi - record_fp true_nega = pred_neg - current_fn # fp_op accumulate every loop spec = true_nega / (true_nega + current_fp) record_fn = false_nega record_fp = false_posi test_accuracy = accuracy.eval( feed_dict={x: piRNA.test.images.reshape(-1, model.time_step, model.num_input), y_: piRNA.test.labels, keep_prob: 1.0}) test_accuracy_list.append(test_accuracy) # Test Set print('Test set accuracy %g' % test_accuracy) # 10-CV metrices (acc, auc) sum_acc = cv.acc(sum_acc, acc, fold, is_display=is_display) sum_auc = cv.auc(sum_auc, auc, fold, is_display=is_display) sum_spec = cv.spec(sum_spec, spec, fold, is_display=is_display) sum_recall = cv.recall(sum_recall, recall, fold, is_display=is_display) test_accuracy_average = cv.average(test_accuracy_list) auc_average = cv.average(cv.auc_list) acc_average = cv.average(cv.acc_list) spec_average = cv.average(cv.spec_list) recall_average = cv.average(cv.recall_list) acc_list.append(acc_average) auc_list.append(auc_average) spec_list.append(spec_average) recall_list.append(recall_average) test_acc_list.append(test_accuracy_average) if is_display: print('*** Test accuracy is:', test_accuracy_list) print('*** The average test accuracy is:%.3f' % test_accuracy_average) print('acc', acc_average) print('auc', auc_average) print('spec', spec_average) print('recall', recall_average) data_frame = pd.DataFrame( {'AUC': auc_list, 'ACC': acc_list, 'SP': spec_list, 'SN': recall_list, 'Test ACC': test_acc_list}) data_frame.to_csv('drosophila1vs1.csv', index=True, columns=['AUC', 'ACC', 'SP', 'SN', 'Test ACC'])
# slim = tf.contrib.slim sys.path.append('../') from pathlib import Path from preprocessing import ssd_vgg_preprocessing from utils import visualization from ssd.ssdmodel import SSDModel import utils.np_methods as np_methods # TensorFlow session: grow memory when needed. TF, DO NOT USE ALL MY GPU MEMORY!!! gpu_options = tf.GPUOptions(allow_growth=True) config = tf.ConfigProto(log_device_placement=False, gpu_options=gpu_options) isess = tf.InteractiveSession(config=config) # Input placeholder. net_shape = (512, 512) data_format = 'NHWC' img_input = tf.placeholder(tf.uint8, shape=(None, None, 3)) # Evaluation pre-processing: resize to SSD net shape. image_pre, labels_pre, bboxes_pre, bbox_img = ssd_vgg_preprocessing.preprocess_for_eval( img_input, None, None, net_shape, data_format, resize=ssd_vgg_preprocessing.Resize.WARP_RESIZE) image_4d = tf.expand_dims(image_pre, 0) # Define the SSD model. g_ssd_model = SSDModel('inception_v3', 'ssd512', weight_decay=0.0005) predictions, localisations, _, _ = g_ssd_model.get_model(image_4d) # Restore SSD model.
def main(*args): # Train model print('Training model...') mnist = input_data.read_data_sets(FLAGS.data_url, one_hot=True) sess = tf.InteractiveSession() serialized_tf_example = tf.placeholder(tf.string, name='tf_example') feature_configs = {'x': tf.FixedLenFeature(shape=[784], dtype=tf.float32),}#手写体的图片大小28*28,总共784个像素,每一个像素都是特征值,采用浮点型表示 tf_example = tf.parse_example(serialized_tf_example, feature_configs) #构建训练模型 x = tf.identity(tf_example['x'], name='x')#定义输入特征值,也就是列长度为784的张量x y_ = tf.placeholder('float', shape=[None, 10])#定义标签值为浮点型,长度为10的one-hot向量,n行10列,n取决于训练的样本数量 w = tf.Variable(tf.zeros([784, 10])) #定义权重参数,因为后面要用到矩阵运算,用x张量乘以w张量,x的列数784要与w的行数相同,所以w是784行、10列的张量,10代表有10个分类 b = tf.Variable(tf.zeros([10]))#定义值参数 #计算预测值:输入值X与权重w相乘,再加上偏置值b,得到预测值 prediction=tf.matmul(x,w)+b #采用softmax函数激活输出预测值y y = tf.nn.softmax(prediction) #将原有的代价函数改为交叉熵代价函数 #cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y_,logits=y)) tf.summary.scalar('cross_entropy', cross_entropy) #定义学习率 learning_rate = 0.01 #使用梯度下降法找到最小代价损失 #train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(cross_entropy) train_step = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy) #初始化全局变量 sess.run(tf.global_variables_initializer()) #将计算结果存放在一个bool列表中 correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) #argmax:返回一维张量中最大的值所在的位置,如果位置相等代表预测正确 #计算精确率 #tf.cast是把bool型数组转化为float型,True转化为1.0, False转化为0.0.reduce_mean时求float型数组的平均值,即正确的个数与所有个数之比.这个数越大越精准 accuracy = tf.reduce_mean(tf.cast(correct_prediction, 'float')) #例如correct_prediction:[true,true,true,true,flase]=>[1,1,1,1,0]=>4/5=>80% tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() test_writer = tf.summary.FileWriter(FLAGS.train_url, flush_secs=1) #开启训练模式,先训练个1000次 for step in range(FLAGS.max_steps): batch = mnist.train.next_batch(50)#随机读取50个训练样本 train_step.run(feed_dict={x: batch[0], y_: batch[1]})#把x和y_喂进去,走起 if step % 10 == 0: summary, acc = sess.run([merged, accuracy], feed_dict={x: mnist.test.images, y_: mnist.test.labels})#使用测试集数据评估准确率 test_writer.add_summary(summary, step) #print('training accuracy is:', acc) print("迭代次数:"+str(step)+",准确率(accuracy):"+str(acc)) print('Done training!') #保存模型 builder = tf.saved_model.builder.SavedModelBuilder(os.path.join(FLAGS.train_url, 'model')) tensor_info_x = tf.saved_model.utils.build_tensor_info(x) tensor_info_y = tf.saved_model.utils.build_tensor_info(y) prediction_signature = ( tf.saved_model.signature_def_utils.build_signature_def( inputs={'images': tensor_info_x}, outputs={'scores': tensor_info_y}, method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)) builder.add_meta_graph_and_variables( sess, [tf.saved_model.tag_constants.SERVING], signature_def_map={ 'predict_images': prediction_signature, }, main_op=tf.tables_initializer(), strip_default_attrs=True) builder.save() print('Done exporting!')
def test(model_save_dir, result_save_dir): all_anchor_list = generate_all_anchor() # meta_data, train_data, test_data, val_data = get_video_data_jukin(options['video_data_path_train'], options['video_data_path_test'], options['video_data_path_val']) # wordtoix = (np.load(options['wordtoix_path'])).tolist() # word_emb_init = np.array(np.load(open(options['word_fts_path'])).tolist(),np.float32) wordtoix = (np.load(options['wordtoix_path'])).tolist() ixtoword = (np.load(options['ixtoword_path'])).tolist() word_emb_init = np.array(np.load(open(options['word_fts_path'])).tolist(),np.float32) train_data = get_video_data_HL(options['video_data_path_train']) # get h5 file list test_data = get_video_data_HL(options['video_data_path_test']) model = SSAD_SCM(options,word_emb_init) inputs,t_predict_overlap,t_predict_reg = model.build_proposal_inference() t_feature_segment = inputs['feature_segment'] t_sentence_index_placeholder = inputs['sentence_index_placeholder'] t_sentence_w_len = inputs['sentence_w_len'] config = tf.ConfigProto(allow_soft_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.3 sess = tf.InteractiveSession(config=config) with tf.device("/cpu:0"): saver = tf.train.Saver(max_to_keep=200) latest_checkpoint = tf.train.latest_checkpoint(model_save_dir) tmp = latest_checkpoint.split('model-') all_epoch = int(tmp[1]) for epoch in range(all_epoch+1): epoch_add = 50 epoch_exact = epoch + epoch_add if os.path.exists(result_save_dir+'/'+str(epoch_exact)+'.pkl'): continue with tf.device("/cpu:0"): saver.restore(sess, tmp[0]+'model-'+str(epoch_exact)) result = [] for current_batch_file_idx in xrange(len(test_data)): print current_batch_file_idx current_batch = h5py.File(test_data[current_batch_file_idx],'r') # processing sentence current_captions_tmp = current_batch['sentence'] current_captions = [] for ind in range(options['batch_size']): current_captions.append(current_captions_tmp[ind]) current_captions = np.array(current_captions) for ind in range(options['batch_size']): for c in string.punctuation: current_captions[ind] = current_captions[ind].replace(c,'') for i in range(options['batch_size']): current_captions[i] = current_captions[i].strip() if current_captions[i] == '': current_captions[i] = '.' current_caption_ind = map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ') if word in wordtoix], current_captions) current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=options['max_sen_len']-1) current_caption_matrix = np.hstack( [current_caption_matrix, np.zeros( [len(current_caption_matrix),1]) ] ).astype(int) current_caption_length = np.array( map(lambda x: (x != 0).sum(), current_caption_matrix )) # save the sentence length of this batch # processing video current_video_feats = np.array(current_batch['video_source_fts']) current_anchor_input = np.array(current_batch['anchor_input']) current_ground_interval = np.array(current_batch['ground_interval']) current_video_name = current_batch['video_name'] current_video_duration = np.array(current_batch['video_duration']) predict_overlap, predict_reg= sess.run( [t_predict_overlap, t_predict_reg], feed_dict={ t_feature_segment: current_video_feats, t_sentence_index_placeholder: current_caption_matrix, t_sentence_w_len: current_caption_length }) for batch_id in range(options['batch_size']): predict_overlap_list = [] predict_center_list = [] predict_width_list = [] expand_anchor_list = [] for anchor_group_id in range(len(options['feature_map_len'])): for anchor_id in range(options['feature_map_len'][anchor_group_id]): for kk in range(4): predict_overlap_list.append(predict_overlap[anchor_group_id][batch_id,0,anchor_id,kk]) predict_center_list.append(predict_reg[anchor_group_id][batch_id,0,anchor_id,kk*2]) predict_width_list.append(predict_reg[anchor_group_id][batch_id,0,anchor_id,kk*2+1]) expand_anchor_list.append(all_anchor_list[anchor_group_id][anchor_id][kk]) a_left = [] a_right = [] a_score = [] for index in range(len(predict_overlap_list)): anchor = expand_anchor_list[index] anchor_center = (anchor[1] - anchor[0]) * 0.5 + anchor[0] anchor_width = anchor[1] - anchor[0] center_offset = predict_center_list[index] width_offset = predict_width_list[index] p_center = anchor_center+0.1*anchor_width*center_offset p_width =anchor_width*np.exp(0.1*width_offset) p_left = max(0, p_center-p_width*0.5) p_right = min(options['sample_len'], p_center+p_width*0.5) if p_right - p_left < 1.0: continue if p_right - p_left > current_batch['video_duration'][batch_id]: continue a_left.append(p_left) a_right.append(p_right) a_score.append(predict_overlap_list[index]) picks = nms_temporal(a_left,a_right,a_score,0.7) process_segment = [] process_score = [] for pick in picks: process_segment.append([a_left[pick],a_right[pick]]) process_score.append(a_score[pick]) result.append([current_batch['video_name'][batch_id],\ current_batch['ground_interval'][batch_id],\ current_batch['sentence'][batch_id],\ process_segment,\ current_batch['video_duration'][batch_id],\ process_score,\ predict_overlap_list,\ predict_center_list,\ predict_width_list] ) pkl.dump(result,open(result_save_dir+'/'+str(epoch_exact)+'.pkl','wb')) logging.info('***************************************************************') analysis_iou(result,epoch_exact,logging) logging.info('***************************************************************')
def train_graph(game, player, display_on, inp, out, trained_steps): # define variables argmax = tf.placeholder("float", [None, ACTIONS]) ground_truth = tf.placeholder("float", [None]) global_step = tf.Variable(0, name='global_step') action = tf.reduce_sum(tf.multiply(out, argmax), reduction_indices = 1) cost = tf.reduce_mean(tf.square(action - ground_truth)) train_step = tf.train.AdamOptimizer(1e-6).minimize(cost) replay_memory = deque() # get, intial frame from 'Pong', process image, and stack frames frame = game.get_initial_frame(display_on) frame = cv2.cvtColor(cv2.resize(frame, (60, 60)), cv2.COLOR_BGR2GRAY) _, frame = cv2.threshold(frame, 1, 255, cv2.THRESH_BINARY) inp_t = np.stack((frame, frame, frame, frame), axis = 2) # saver and session manager saver = tf.train.Saver(tf.global_variables(), max_to_keep=None) session = tf.InteractiveSession(config=tf.ConfigProto(log_device_placement=True)) checkpoint_path = "./checkpoint_" + trained_steps checkpoint = tf.train.latest_checkpoint(checkpoint_path) if checkpoint != None: saver.restore(session, checkpoint) else: init = tf.global_variables_initializer() session.run(init) t = global_step.eval() c = 0 epsilon = INITIAL_EPSILON avg_max_q = 0 # main training loop while(1): out_t = out.eval(feed_dict = {inp : [inp_t]})[0] # output tensor argmax_t = np.zeros([ACTIONS]) # argmax tensor reward_t = 0 # reward tensor # choose action to take (random if epsilon) if(random.random() <= epsilon and not USE_MODEL): maxIndex = choice((0,1,2), 1, p=(0.9, 0.05, 0.05)) # make 0 the most choosen action for realistic randomness else: maxIndex = np.argmax(out_t) # set action to take argmax_t[maxIndex] = 1 # anneal epsilon according to cooling schedule if epsilon > FINAL_EPSILON: epsilon -= (INITIAL_EPSILON - FINAL_EPSILON) / EXPLORE_STEPS # get next frame (state) and reward from the resulting state if player == 1: reward_t, _, frame = game.get_next_frame(argmax_t, None, display_on) elif player == 2: _, reward_t, frame = game.get_next_frame(None, argmax_t, display_on) # process state frame = cv2.cvtColor(cv2.resize(frame, (60, 60)), cv2.COLOR_BGR2GRAY) _, frame = cv2.threshold(frame, 1, 255, cv2.THRESH_BINARY) frame = np.reshape(frame, (60, 60, 1)) updated_inp_t = np.append(frame, inp_t[:, :, 0:3], axis = 2) # updated input tensor # add our input, argmax, reward, and updated input tensors to replay memory replay_memory.append((inp_t, argmax_t, reward_t, updated_inp_t)) # if we run out of replay memory, make room if len(replay_memory) > REPLAY_MEMORY_SIZE: replay_memory.popleft() # training update iteration if c > OBSERVE_STEPS and not USE_MODEL: # get values from our replay memory minibatch = random.sample(replay_memory, BATCH) inp_batch = [dim[0] for dim in minibatch] argmax_batch = [dim[1] for dim in minibatch] reward_batch = [dim[2] for dim in minibatch] updated_inp_t_batch = [dim[3] for dim in minibatch] ground_truth_batch = [] out_batch = out.eval(feed_dict = {inp : updated_inp_t_batch}) # add values to the batch for i in range(0, len(minibatch)): ground_truth_batch.append(reward_batch[i] + GAMMA * np.max(out_batch[i])) # train the model train_step.run(feed_dict = {ground_truth : ground_truth_batch, argmax : argmax_batch, inp : inp_batch}) # next frame inp_t = updated_inp_t t = t + 1 c = c + 1 # save model at set intervals if t % SAVE_STEP == 0 and not USE_MODEL: session.run(global_step.assign(t)) saver.save(session, './checkpoints/model.ckpt', global_step=t)
def init_session(self): config = tf.ConfigProto() config.gpu_options.allow_growth = False print("Session and graph initialized.") self.sess = tf.InteractiveSession(config=config, graph=tf.Graph())
def train(batch_size, num_epochs, num_train, num_val, alpha_regularizer, lstm_mode, html_type): display_step = 10 filewriter_path = model_base + "tensorboard/" checkpoint_path = model_base + "checkpoint/" if os.path.exists(filewriter_path): shutil.rmtree(filewriter_path) os.makedirs(filewriter_path) if not os.path.isdir(checkpoint_path): os.makedirs(checkpoint_path) #share placeholders keep_prob = tf.placeholder(tf.float32, name='keep_prob_placeholder') y = tf.placeholder(tf.float32, [ None, ], name='label_placeholder') #image placeholders x = tf.placeholder(tf.float32, [None, IMAGE_HEIGHT, IMAGE_WIDTH, 3], name='image_placeholder') vr_type = tf.placeholder(tf.float32, [None, type_num], name='type_placeholder') #text placeholders title = tf.placeholder(tf.int32, (None, None)) title_len = tf.placeholder(tf.int32, (None)) snippet = tf.placeholder(tf.int32, (None, None)) snippet_len = tf.placeholder(tf.int32, (None)) session_title = tf.placeholder(tf.int32, (None, None)) sess_len_title = tf.placeholder(tf.int32, (None)) session_snippet = tf.placeholder(tf.int32, (None, None)) sess_len_snippet = tf.placeholder(tf.int32, (None)) sessions_weight_snippet = tf.placeholder( tf.float32, [None, sess_sen_len_snippet, feature_dim]) attention_title = tf.placeholder(tf.float32, [None, max_title_len_top, feature_dim]) attention_snippet = tf.placeholder( tf.float32, [None, max_snippet_len_top, feature_dim]) #html placeholders html_tag = tf.placeholder(tf.int32, [None, html_dim], name='tag_placeholder') html_class = tf.placeholder(tf.int32, [None, html_dim], name='class_placeholder') #with tf.name_scope('image'): image_placeholders = [x, vr_type, keep_prob] pred_image = image(image_placeholders) #with tf.name_scope('title'): title_placeholders = [ title, title_len, attention_title, session_title, sess_len_title ] pred_title = text(title_placeholders, 'title') #with tf.name_scope('snippet'): snippet_placeholders = [ snippet, snippet_len, attention_snippet, session_snippet, sess_len_snippet, sessions_weight_snippet ] pred_snippet = text(snippet_placeholders, 'snippet') #with tf.name_scope('html'): html_placeholders = [html_tag, html_class] pred_html = html(html_placeholders) #fusion pred_combine = tf.squeeze( tf.concat([pred_image, pred_title, pred_snippet, pred_html], 1)) balance_raw = tf.Variable(tf.ones([4]), name='balance', trainable=True) #without XPN #pred_combine = tf.squeeze(tf.concat([pred_image, pred_title, pred_html], 1)) #balance_raw = tf.Variable(tf.ones([3]), name='balance', trainable=True) balance_sum = tf.reduce_sum(balance_raw) balance = tf.div(balance_raw, balance_sum) pred_final = tf.reduce_sum(tf.multiply(pred_combine, balance), 1) with tf.name_scope("loss"): regularizer = tf.contrib.layers.l2_regularizer(alpha_regularizer) loss_regularizer = tf.contrib.layers.apply_regularization( regularizer, tf.trainable_variables()) #sigmoid_cross_entropy = tf.nn.sigmoid_cross_entropy_with_logits(labels = tf.squeeze(y), logits = pred_final) sigmoid_cross_entropy = cross_entropy(labels=tf.squeeze(y), logits=pred_final) loss_cross_entropy = tf.reduce_mean(sigmoid_cross_entropy, name='loss_cross_entropy') loss_mse = tf.reduce_mean(tf.square(pred_final - tf.squeeze(y))) loss = loss_cross_entropy print('Get ready! We are going to print all the trainable vars.') var_list = [v for v in tf.trainable_variables()] for var in var_list: print(var.name) print('Ok, print done.') var_train_list = var_list with tf.name_scope("train"): gradients = tf.gradients(loss, var_train_list) #gradients, global_norm = tf.clip_by_global_norm(gradients, 1) gradients = list(zip(gradients, var_train_list)) #optimizer = tf.train.GradientDescentOptimizer(learning_rate) optimizer = tf.train.AdamOptimizer(learning_rate) train_op = optimizer.apply_gradients(grads_and_vars=gradients) #train_op = optimizer.minimize(loss) for var in var_list: tf.summary.histogram(var.name, var) tf.summary.scalar('loss_regularizer_fusion', loss_regularizer) tf.summary.scalar('loss_cross_entropy_fusion', loss_cross_entropy) tf.summary.scalar('loss_mse_fusion', loss_mse) tf.summary.scalar('loss_fusion', loss) merged_summary = tf.summary.merge_all() writer = tf.summary.FileWriter(filewriter_path) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.InteractiveSession(config=config) sess.run(tf.global_variables_initializer()) writer.add_graph(sess.graph) images_vals, title_vars, snippet_vars, html_vars = [], [], [], [] for var in var_list: if var.name.find("title") != -1: print('title: ' + var.name) title_vars.append(var) elif var.name.find("snippet") != -1: print('snippet: ' + var.name) snippet_vars.append(var) elif var.name.find('html') != -1: print('html: ' + var.name) html_vars.append(var) elif var.name.find("balance") == -1: print('image: ' + var.name) images_vals.append(var) # saver_image = tf.train.Saver(images_vals) # saver_title = tf.train.Saver(title_vars) # saver_snippet = tf.train.Saver(snippet_vars) # saver_html = tf.train.Saver(html_vars) # saver_image.restore(sess, model_base+"checkpoint/VPN/model_image.ckpt") # print('image model successfully loaded!') # saver_title.restore(sess, model_base+"checkpoint/TSN/model_title.ckpt") # print('title model successfully loaded!') # saver_snippet.restore(sess, model_base+"checkpoint/SSN/model_snippet.ckpt") # print('snippet model successfully loaded!') # saver_html.restore(sess, model_base+'checkpoint/HSN/model_html.ckpt') # print('html model successfully loaded!') saver = tf.train.Saver(max_to_keep=20) train_dataset = val_dataset = '201709' train_path = data_base + '201709/info_top_10_id_201709' val_path = data_base + '201709/info_top_10_id_201709' images_train, rels_train, num_data_train = set_data_image( train_path, train_dataset) images_val, rels_val, num_data_val = set_data_image(val_path, val_dataset) type_train = set_data_type(train_path) type_val = set_data_type(val_path) titles_train, snippets_train, rels_train, queries_train, num_data_train = set_data_text( 'text', train_path) titles_val, snippets_val, rels_val, queries_val, num_data_val = set_data_text( 'text', val_path) sess_title_train, sessions_weight_title_train = set_data_sess( 'title', train_path, train_dataset) sess_snippet_train, sessions_weight_snippet_train = set_data_sess( 'snippet', train_path, train_dataset) sess_title_val, sessions_weight_title_val = set_data_sess( 'title', val_path, val_dataset) sess_snippet_val, sessions_weight_snippet_val = set_data_sess( 'snippet', val_path, val_dataset) DFS_tag_train, DFS_class_train, BFS_tag_train, BFS_class_train, rels_train, num_data_train = set_data_html( train_path, train_dataset) DFS_tag_val, DFS_class_val, BFS_tag_val, BFS_class_val, rels_val, num_data_val = set_data_html( val_path, val_dataset) print('train data num:{}'.format(num_data_train)) print('val data num:{}'.format(num_data_val)) print("{} Start training...".format(datetime.now())) print("{} Open Tensorboard at --logdir {}".format(datetime.now(), filewriter_path)) if num_train == 'all': num_train = num_data_train else: num_train = int(num_train) if num_val == 'all': num_val = num_data_val else: num_val = int(num_val) for epoch in range(num_epochs): print("{} Epoch number: {}".format(datetime.now(), epoch + 1)) step = 1 for iter in xrange(num_train / batch_size): ind = set_random_ind(num_data_train, batch_size, random=True) pic_input, label_input = data_batch_image(images_train, rels_train, num_data_train, batch_size, ind) type_input = data_batch_type(type_train, batch_size, ind) title_input, title_len_input, label_input, attention_title_input = data_batch_text( titles_train, queries_train, window_weight, rels_train, num_data_train, batch_size, max_title_len_top, ind) snippet_input, snippet_len_input, label_input, attention_snippet_input = data_batch_text( snippets_train, queries_train, window_weight, rels_train, num_data_train, batch_size, max_snippet_len_top, ind) sess_title_input, sess_title_len_input, label_input, attention_sess_title_input = data_batch_text( sess_title_train, queries_train, window_weight, rels_train, num_data_train, batch_size, sess_sen_len_title, ind) sess_snippet_input, sess_snippet_len_input, label_input, attention_sess_snippet_input = data_batch_text( sess_snippet_train, queries_train, window_weight, rels_train, num_data_train, batch_size, sess_sen_len_snippet, ind) sessions_weight_snippet_input = sess_weight_batch( 'snippet', batch_size, sessions_weight_snippet_train, ind) if html_type == 'DFS': tag_input, label_input = data_batch_html( DFS_tag_train, rels_train, ind) class_input, label_input = data_batch_html( DFS_class_train, rels_train, ind) elif html_type == 'BFS': tag_input, label_input = data_batch_html( BFS_tag_train, rels_train, ind) class_input, label_input = data_batch_html( BFS_class_train, rels_train, ind) train_op_, loss_, loss_cross_entropy_, loss_mse_, loss_regularizer_, merged_summary_, pred_final_, pred_combine_, balance_ = sess.run( [ train_op, loss, loss_cross_entropy, loss_mse, loss_regularizer, merged_summary, pred_final, pred_combine, balance ], feed_dict={ y: label_input, keep_prob: dropout_rate, x: pic_input, vr_type: type_input, title: title_input, title_len: title_len_input, session_title: sess_title_input, sess_len_title: sess_title_len_input, attention_title: attention_title_input, snippet: snippet_input, snippet_len: snippet_len_input, session_snippet: sess_snippet_input, sess_len_snippet: sess_snippet_len_input, sessions_weight_snippet: sessions_weight_snippet_input, attention_snippet: attention_snippet_input, html_tag: tag_input, html_class: class_input }) print( "the " + str(epoch + 1) + 'th epoch, ' + str(iter + 1) + 'th batch: loss:{} loss_cross_entropy:{} loss_mse:{} loss_regularizer:{}' .format(loss_, loss_cross_entropy_, loss_mse_, loss_regularizer_)) print(balance_) if step % display_step == 0: writer.add_summary(merged_summary_, epoch * num_train / batch_size + step) step += 1 dropout_rate_val = 1 print("{} Start validation...".format(datetime.now())) loss_total = 0. pred_all, label_all = [], [] iters = num_val / batch_size for iter in xrange(iters): ind = set_random_ind(num_data_val, batch_size, random=False, iter_=iter) pic_input, label_input = data_batch_image(images_val, rels_val, num_data_val, batch_size, ind) type_input = data_batch_type(type_val, batch_size, ind) title_input, title_len_input, label_input, attention_title_input = data_batch_text( titles_val, queries_val, window_weight, rels_val, num_data_val, batch_size, max_title_len_top, ind) snippet_input, snippet_len_input, label_input, attention_snippet_input = data_batch_text( snippets_val, queries_val, window_weight, rels_val, num_data_val, batch_size, max_snippet_len_top, ind) sess_title_input, sess_title_len_input, label_input, attention_sess_title_input = data_batch_text( sess_title_val, queries_val, window_weight, rels_val, num_data_val, batch_size, sess_sen_len_title, ind) sess_snippet_input, sess_snippet_len_input, label_input, attention_sess_snippet_input = data_batch_text( sess_snippet_val, queries_val, window_weight, rels_val, num_data_val, batch_size, sess_sen_len_snippet, ind) sessions_weight_snippet_input = sess_weight_batch( 'snippet', batch_size, sessions_weight_snippet_val, ind) if html_type == 'DFS': tag_input, label_input = data_batch_html( DFS_tag_val, rels_val, ind) class_input, label_input = data_batch_html( DFS_class_val, rels_val, ind) elif html_type == 'BFS': tag_input, label_input = data_batch_html( BFS_tag_val, rels_val, ind) class_input, label_input = data_batch_html( BFS_class_val, rels_val, ind) loss_, loss_cross_entropy_, loss_mse_, loss_regularizer_ = sess.run( [loss, loss_cross_entropy, loss_mse, loss_regularizer], feed_dict={ y: label_input, keep_prob: dropout_rate, x: pic_input, vr_type: type_input, title: title_input, title_len: title_len_input, session_title: sess_title_input, sess_len_title: sess_title_len_input, attention_title: attention_title_input, snippet: snippet_input, snippet_len: snippet_len_input, session_snippet: sess_snippet_input, sess_len_snippet: sess_snippet_len_input, sessions_weight_snippet: sessions_weight_snippet_input, attention_snippet: attention_snippet_input, html_tag: tag_input, html_class: class_input }) loss_total += loss_ * batch_size print( "the " + str(epoch + 1) + 'th epoch, ' + str(iter + 1) + 'th batch: loss:{} loss_cross_entropy:{} loss_mse:{} loss_regularizer:{}' .format(loss_, loss_cross_entropy_, loss_mse_, loss_regularizer_)) print('average loss: {}'.format(loss_total * 1.0 / iters / batch_size)) print("{} Saving checkpoint of model...".format(datetime.now())) checkpoint_name = os.path.join( checkpoint_path, 'model_JRE_epoch_' + str(epoch + 1) + '.ckpt') save_path = saver.save(sess, checkpoint_name) print("{} Model checkpoint saved at {}".format(datetime.now(), checkpoint_name))
def prac7ConvMLPModel(model='MLP', MLPTop={}, convTop={}, optimiser={}, act=tf.nn.relu, max_steps=100): # Import data mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) #Create Inputs x is MNIST image and y_labels is the label tf.reset_default_graph() sess = tf.InteractiveSession() optimise = Helpers.optimiserParams(optimiser) if optimise == None: print("Invalid Optimiser") return with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 784], name='x-input') y_labels = tf.placeholder(tf.float32, [None, 10], name='y-input') with tf.name_scope('input_reshape'): image_shaped_input = tf.reshape(x, [-1, 28, 28, 1]) tf.summary.image('input', image_shaped_input, 10) #Generate hidden layers layers = {} if model == 'convNet': topology = Helpers.convParams(convTop) FCLayerSize = topology.pop('FCLayerSize') for i in range(topology.pop('convPoolLayers')): if i == 0: layers[str(i)] = Helpers.convLayer(image_shaped_input, "convPoolLayer" + str(i), i, **topology) else: layers[str(i)] = Helpers.convLayer(layers[str(i - 1)], "convPoolLayer" + str(i), i, **topology) FC1 = Helpers.conv2FCLayer(layers[str(i)], FCLayerSize, "FC1") y = Helpers.FCLayer(FC1, FCLayerSize, 10, 'output_layer', act=tf.identity) elif model == 'MLP': hiddenDims = MLPTop.setdefault("hiddenDims", [500]) for i in range(len(hiddenDims)): if i == 0: layers[str(i)] = Helpers.FCLayer(x, 784, hiddenDims[i], "hidden_layer_" + str(i)) else: layers[str(i)] = Helpers.FCLayer(layers[str(i - 1)], hiddenDims[i - 1], hiddenDims[i], "hidden_layer_" + str(i)) y = Helpers.FCLayer(layers[str(i)], hiddenDims[i], 10, 'output_layer', act=tf.identity) else: print("MLP or convNet - nothing else is valid") return with tf.name_scope('cross_entropy'): diff = tf.nn.softmax_cross_entropy_with_logits(labels=y_labels, logits=y) with tf.name_scope('total'): cross_entropy = tf.reduce_mean(diff) tf.summary.scalar('cross_entropy', cross_entropy) with tf.name_scope('train'): train_step = optimise.minimize(cross_entropy) with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_labels, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) merged = tf.summary.merge_all() testPath, trainPath = Helpers.getSaveDir(model) train_writer = tf.summary.FileWriter(trainPath, sess.graph) test_writer = tf.summary.FileWriter(testPath) tf.global_variables_initializer().run() def feed_dict(train): """Make a TensorFlow feed_dict: maps data onto Tensor placeholders.""" if train: xs, ys = mnist.train.next_batch(100) else: xs, ys = mnist.test.images, mnist.test.labels return {x: xs, y_labels: ys} for i in range(max_steps): if i % 10 == 0: # Record summaries and test-set accuracy summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False)) test_writer.add_summary(summary, i) print('Accuracy at step %s: %s' % (i, acc)) else: # Record train set summaries, and train if i % 25 == 24: # Record execution stats run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True), options=run_options, run_metadata=run_metadata) train_writer.add_run_metadata(run_metadata, 'step%03d' % i) train_writer.add_summary(summary, i) print('Adding run metadata for', i) else: # Record a summary summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True)) train_writer.add_summary(summary, i) train_writer.close() test_writer.close() print( "Accuracy on test set: ", sess.run(accuracy, feed_dict={ x: mnist.test.images, y_labels: mnist.test.labels })) sess.close() Helpers.openTensorBoard(trainPath, testPath)
def nn(labelled_data, prediction_features, hidden1_units): # Random array generation for selecting training, cross-validation and test samples features_size = np.size(labelled_data, 1) - 1 # Number of features samples_size = np.size(labelled_data, 0) # Number of samples rand_arr = np.arange(samples_size) # Random array for shuffling samples np.random.shuffle(rand_arr) # Training Samples train_size = int(np.floor(0.9 * samples_size)) # Number of training samples train_inputs = labelled_data[rand_arr[0:train_size], 0:features_size] # Training features train_labels = labelled_data[rand_arr[0:train_size], features_size] # Training labels # Cross Validation Samples #crossval_size = samples_size - train_size # Number of cross validation samples crossval_size = int(np.floor(0.05 * samples_size)) crossval_inputs = labelled_data[rand_arr[train_size:train_size + crossval_size], 0:features_size] # Cross Validation features crossval_labels = labelled_data[rand_arr[train_size:train_size + crossval_size], features_size] # Cross Validation labels # Test Samples test_size = samples_size - (train_size + crossval_size) test_inputs = labelled_data[rand_arr[train_size + crossval_size:samples_size], 0:features_size] # Test features test_labels = labelled_data[rand_arr[train_size + crossval_size:samples_size], features_size] # Test labels # Input Features a0 = tf.placeholder(tf.float32, [None, features_size]) # Hidden Layer 1 W1 = tf.Variable(tf.truncated_normal([features_size, hidden1_units], stddev=1.0 / np.sqrt(float(features_size))), name='W1') b1 = tf.Variable(tf.zeros([hidden1_units]), name='b1') a1 = tf.nn.sigmoid(tf.matmul(a0, W1) + b1) # Final Layer Shallow W_final = tf.Variable(tf.truncated_normal([hidden1_units, 1], stddev=1.0 / np.sqrt(float(hidden1_units))), name='W_final') b_final = tf.Variable(tf.zeros([1]), name='b_final') y = tf.nn.sigmoid(tf.matmul(a1, W_final) + b_final) ## # Hidden Layer 2 ## W2 = tf.Variable(tf.truncated_normal([hidden1_units, hidden2_units], ## stddev=1.0 / np.sqrt(float(hidden1_units))), name='W2') ## b2 = tf.Variable(tf.zeros([hidden2_units]), name='b2') ## a2 = tf.nn.sigmoid(tf.matmul(a1, W2) + b2) ## ## # Final Layer ## W_final = tf.Variable(tf.truncated_normal([hidden2_units, 1], ## stddev=1.0 / np.sqrt(float(hidden2_units))), name='W_final') ## b_final = tf.Variable(tf.zeros([1]), name='b_final') ## ## y = tf.nn.sigmoid(tf.matmul(a2, W_final) + b_final) # Labels y_ = tf.placeholder(tf.float32, [None, 1]) # Loss loss = tf.reduce_mean(tf.square(y - y_)) # Initialize Session sess = tf.InteractiveSession() tf.global_variables_initializer().run() #Train learning_rate = 0.5 steps = 1000 train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss) for i in range(steps): sess.run(train_step, feed_dict={a0: train_inputs, y_: train_labels}) if np.mod(i, 100) == 0: print('%d: %f' %(i, sess.run(tf.reduce_mean(loss), feed_dict={a0: train_inputs, y_: train_labels}))) # Cross Validation Error print('Cross Validation Error: %f' %sess.run(tf.reduce_mean(loss), feed_dict={a0: crossval_inputs, y_: crossval_labels})) # Test Results for i in range(test_size): results = int(sess.run(y, feed_dict={a0: test_inputs[i], y_: test_labels[i]}) > 0.8) ## results = sess.run(y, feed_dict={a0: test_inputs[i], ## y_: test_labels[i]}) print('Label: %d Prediction: %f' %(test_labels[i], results)) # Prediction print('Prediction: %f (True if > 0.8)' %(sess.run(y, feed_dict={a0: prediction_features})))
def train(_): # create new log files if tf.gfile.Exists(FLAGS.log_dir): tf.gfile.DeleteRecursively(FLAGS.log_dir) tf.gfile.MakeDirs(FLAGS.log_dir) tf.reset_default_graph() tf.set_random_seed(2) np.random.seed(2) # Import data mnist = input_data.read_data_sets("MNIST-data/", one_hot=True) X_train = mnist.train.images.reshape(mnist.train.images.shape[0], 28, 28, 1) y_train = mnist.train.labels.astype(np.int64) batch_size = 500 gen = ImageDataGenerator(rotation_range=6, width_shift_range=0.06, shear_range=0.27, height_shift_range=0.06, zoom_range=0.06) train_gen = gen.flow(X_train, y_train, batch_size=batch_size, seed=0) # Create a multilayer model. sess = tf.InteractiveSession() # Input placeholders with tf.name_scope('input'): x = tf.placeholder(tf.float32, [None, 784], name='x-input') y_ = tf.placeholder(tf.int64, [None, 10], name='y-input') def weight_variable(shape): """Create a weight variable with appropriate initialization.""" initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): """Create a bias variable with appropriate initialization.""" initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) def conv2dx(x, num_outputs): return tf.contrib.layers.conv2d( x, kernel_size=[3, 3], num_outputs=num_outputs, stride=[1, 1], padding='SAME', normalizer_fn=tf.layers.batch_normalization, normalizer_params={ "training": False, "reuse": False }, activation_fn=tf.nn.relu, ) x_image = tf.reshape(x, [-1, 28, 28, 1]) #conv1 with batch normalisation conv1 = conv2dx(x_image, 32) print("conv1" + str(conv1.get_shape())) #conv2 with batch normalisation conv2 = conv2dx(conv1, 64) print("conv2" + str(conv2.get_shape())) #pool1 pool1 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') print("pool1" + str(pool1.get_shape())) #conv3 with batch normalisation conv3 = conv2dx(pool1, 64) print("conv1" + str(conv3.get_shape())) #conv4 with batch normalisation conv4 = conv2dx(conv3, 64) print("conv4" + str(conv4.get_shape())) #pool2 pool2 = tf.nn.max_pool(conv4, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') print("pool1" + str(pool2.get_shape())) # dense1 with flatten W_fc1 = weight_variable([28 * 28 * 16, 512]) b_fc1 = bias_variable([512]) flat = tf.reshape(conv3, [-1, 28 * 28 * 16]) fc1 = tf.nn.relu(tf.matmul(flat, W_fc1) + b_fc1) print("fc1" + str(fc1.get_shape())) keep_prob = tf.placeholder(tf.float32) fc1_drop = tf.nn.dropout(fc1, keep_prob) print("fc1_drop" + str(fc1_drop.get_shape())) # dense2 then softmax the output W_fc2 = weight_variable([512, 10]) b_fc2 = bias_variable([10]) y = tf.nn.softmax(tf.matmul(fc1_drop, W_fc2) + b_fc2) print("y" + str(y.get_shape())) with tf.name_scope('cross_entropy'): with tf.name_scope('total'): cross_entropy = tf.reduce_mean(-tf.reduce_sum( tf.cast(y_, tf.float32) * tf.log(y), reduction_indices=[1])) # cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=y_, logits=y) tf.summary.scalar('cross_entropy', cross_entropy) with tf.name_scope('train'): train_step = tf.train.AdamOptimizer( FLAGS.learning_rate).minimize(cross_entropy) with tf.name_scope('accuracy'): with tf.name_scope('correct_prediction'): correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) with tf.name_scope('accuracy'): accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) tf.summary.scalar('accuracy', accuracy) # Merge all the summaries and write them out merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter(FLAGS.log_dir + '/train', sess.graph, flush_secs=10) test_writer = tf.summary.FileWriter(FLAGS.log_dir + '/test', flush_secs=10) tf.global_variables_initializer().run() def feed_dict(train): if train: xs, ys = next(train_gen) xs = xs.reshape(batch_size, 28 * 28) k = FLAGS.dropout else: xs, ys = mnist.test.images, mnist.test.labels k = 1.0 return {x: xs, y_: ys, keep_prob: k} for i in range(FLAGS.max_steps + 1): if i % 100 == 0: # Record summaries and test-set accuracy summary, acc = sess.run([merged, accuracy], feed_dict=feed_dict(False)) test_writer.add_summary(summary, i) print('%s Accuracy at step %s: %s' % (datetime.now(), i, acc)) else: # Record train set summaries, and train if i % 100 == 99: # Record execution stats run_options = tf.RunOptions() run_metadata = tf.RunMetadata() summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True), options=run_options, run_metadata=run_metadata) train_writer.add_run_metadata(run_metadata, 'step%03d' % i) train_writer.add_summary(summary, i) # print('Adding run metadata for', i) else: # Record a summary summary, _ = sess.run([merged, train_step], feed_dict=feed_dict(True)) if i % 10 == 0: train_writer.add_summary(summary, i) train_writer.close() test_writer.close()
data = tf.nn.embedding_lookup(wordVectors, input_data) lstmCell = tf.contrib.rnn.BasicLSTMCell(lstmUnits) lstmCell = tf.contrib.rnn.DropoutWrapper(cell=lstmCell, output_keep_prob=0.25) value, _ = tf.nn.dynamic_rnn(lstmCell, data, dtype=tf.float32) weight = tf.Variable(tf.truncated_normal([lstmUnits, numClasses])) bias = tf.Variable(tf.constant(0.1, shape=[numClasses])) value = tf.transpose(value, [1, 0, 2]) last = tf.gather(value, int(value.get_shape()[0]) - 1) prediction = (tf.matmul(last, weight) + bias) correctPred = tf.equal(tf.argmax(prediction, 1), tf.argmax(labels, 1)) accuracy = tf.reduce_mean(tf.cast(correctPred, tf.float32)) sess = tf.InteractiveSession() saver = tf.train.Saver() saver = tf.train.import_meta_graph('models/pretrained_lstm.ckpt-60000.meta') saver.restore(sess, tf.train.latest_checkpoint('models')) # Removes punctuation, parentheses, question marks, etc., and leaves only alphanumeric characters import re strip_special_chars = re.compile("[^A-Za-z0-9 ]+") def cleanSentences(string): string = string.lower().replace("<br />", " ") return re.sub(strip_special_chars, "", string.lower())
def evaluation(img_path, ckpt_path): tf.reset_default_graph() #画像を開く f = open(img_path, 'r') #画像読み込み img = cv2.imread(img_path, cv2.IMREAD_COLOR) #モノクロ画像に変換 gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) face = faceCascade.detectMultiScale(gray, 1.1, 3) if len(face) > 0: for rect in face: #加工した画像に適当な名前をつける random_str = str(random.random()) #顔部分を赤線で書く cv2.rectangle(img, tuple(rect[0:2]), tuple(rect[0:2]+rect[2:4]) ,(0,0,255), thickness=2) #顔部分を赤線で囲んだ画像の保存先を記入 ############################################################################### face_detect_img_path = '/顔検出した画像のパス'+random_str+'.jpg' #顔検出した画像のパスを記入 ############################################################################### #顔部分を赤線で囲んだ画像の保存 cv2.imwrite(face_detect_img_path, img) x = rect[0] y = rect[1] w = rect[2] h = rect[3] #検出した顔を切り抜いた画像を保存する場所を記入 ############################################################################### cv2.imwrite('/保存先のパス' + random_str + '.jpg', img[y:y+h, x:x+x+w]) #Tensorflowへ渡す切り抜いた顔画像 target_image_path = '/保存先のパス' + random_str + '.jpg' ############################################################################### else: #顔が見つからなければ処理終了 print('image:NoFace') return f.close() f = open(target_image_path, 'r') #データを入れる配列 image = [] img = cv2.imread(target_image_path) #28px*28pxにリサイズ img = cv2.resize(img, (28, 28)) #画像情報を1列にした後, 0-1のfloat値にする image.append(img.flatten().astype(np.float32)/255.0) #numpy形式に変換し、Tensorflowで処理できるようにする image = np.asarray(image) #入力画像に対して各ラベルの確立を出力して返す logits = inference(image, 1.0) #We can just use 'c.eval()' without passing 'sess' sess = tf.InteractiveSession() #restore(パラメータ読み込み)の準備 saver = tf.train.Saver() #変数の初期化 sess.run(tf.initialize_all_variables()) if ckpt_path: #学習後のパラメータの読み込み saver.restore(sess, ckpt_path) #sess.run(logits)と同じ softmax = logits.eval() #判定結果 result = softmax[0] #判定結果を%にして四捨五入 rates = [round(n*100.0, 1) for n in result] humans = [] #ラベル番号、名前、パーセンテージのHashを作成 for index, rate in enumerate(rates): name = HUMAN_NAMES[index] humans.append({ 'label':index, 'name':name, 'rate':rate }) #パーセンテージの高い順にソート rank = sorted(humans, key=lambda x:x['rate'], reverse=True) #判定結果と加工した画像のpathを返す return [rank, face_detect_img_path, target_image_path]
return eval_acc / len(Xv), eval_loss / len(Xv) NUM_EPOCHS_FULL = 50 S_LEARNING_RATE_FULL = 0.01 F_LEARNING_RATE_FULL = 0.0001 BATCH_SIZE = 16 writerLoss = tf.summary.FileWriter("./logs/project3/loss_") writerAcc = tf.summary.FileWriter("./logs/project3/acc_") log_var = tf.Variable(0.0) tf.summary.scalar("train", log_var) write_op = tf.summary.merge_all() plotSession = tf.InteractiveSession() plotSession.run(tf.global_variables_initializer()) def train(): with tf.Session(graph=graph) as session: # weight initialization session.run(tf.global_variables_initializer()) # full optimization maxAcc = 0 for epoch in range(NUM_EPOCHS_FULL): lr = (S_LEARNING_RATE_FULL * (NUM_EPOCHS_FULL - epoch - 1) + F_LEARNING_RATE_FULL * epoch) / (NUM_EPOCHS_FULL - 1) training_epoch(epoch, session, train_op, lr)
def train(): train_data, _ = get_video_data(video_data_path, video_feat_path, train_ratio=0.9) captions = train_data['Description'].values captions = map(lambda x: x.replace('.', ''), captions) captions = map(lambda x: x.replace(',', ''), captions) wordtoix, ixtoword, bias_init_vector = preProBuildWordVocab(captions, word_count_threshold=10) np.save('./data/ixtoword', ixtoword) model = Video_Caption_Generator( dim_image=dim_image, n_words=len(wordtoix), dim_hidden=dim_hidden, batch_size=batch_size, n_lstm_steps=n_frame_step, bias_init_vector=bias_init_vector) tf_loss, tf_video, tf_video_mask, tf_caption, tf_caption_mask, tf_probs = model.build_model() sess = tf.InteractiveSession() saver = tf.train.Saver(max_to_keep=10) train_op = tf.train.AdamOptimizer(learning_rate).minimize(tf_loss) tf.initialize_all_variables().run() for epoch in range(n_epochs): index = list(train_data.index) np.random.shuffle(index) train_data = train_data.ix[index] current_train_data = train_data.groupby('video_path').apply(lambda x: x.irow(np.random.choice(len(x)))) current_train_data = current_train_data.reset_index(drop=True) for start,end in zip( range(0, len(current_train_data), batch_size), range(batch_size, len(current_train_data), batch_size)): current_batch = current_train_data[start:end] current_videos = current_batch['video_path'].values current_feats = np.zeros((batch_size, n_frame_step, dim_image)) current_feats_vals = map(lambda vid: np.load(vid), current_videos) current_video_masks = np.zeros((batch_size, n_frame_step)) for ind,feat in enumerate(current_feats_vals): current_feats[ind][:len(current_feats_vals[ind])] = feat current_video_masks[ind][:len(current_feats_vals[ind])] = 1 current_captions = current_batch['Description'].values current_caption_ind = map(lambda cap: [wordtoix[word] for word in cap.lower().split(' ')[:-1] if word in wordtoix], current_captions) current_caption_matrix = sequence.pad_sequences(current_caption_ind, padding='post', maxlen=n_frame_step-1) current_caption_matrix = np.hstack( [current_caption_matrix, np.zeros( [len(current_caption_matrix),1]) ] ).astype(int) current_caption_masks = np.zeros((current_caption_matrix.shape[0], current_caption_matrix.shape[1])) nonzeros = np.array( map(lambda x: (x != 0).sum()+1, current_caption_matrix )) for ind, row in enumerate(current_caption_masks): row[:nonzeros[ind]] = 1 probs_val = sess.run(tf_probs, feed_dict={ tf_video:current_feats, tf_caption: current_caption_matrix }) _, loss_val = sess.run( [train_op, tf_loss], feed_dict={ tf_video: current_feats, tf_video_mask : current_video_masks, tf_caption: current_caption_matrix, tf_caption_mask: current_caption_masks }) print loss_val if np.mod(epoch, 100) == 0: print "Epoch ", epoch, " is done. Saving the model ..." saver.save(sess, os.path.join(model_path, 'model'), global_step=epoch)