else: print('Build model...') model = Sequential() model.add( WordContextProduct(max_features, proj_dim=dim_proj, init="uniform")) model.compile(loss='mse', optimizer='rmsprop') sampling_table = sequence.make_sampling_table(max_features) for e in range(nb_epoch): print('-' * 40) print('Epoch', e) print('-' * 40) progbar = generic_utils.Progbar(tokenizer.document_count) samples_seen = 0 losses = [] for i, seq in enumerate( tokenizer.texts_to_sequences_generator(text_generator())): # get skipgram couples for one text in the dataset couples, labels = sequence.skipgrams(seq, max_features, window_size=4, negative_samples=1., sampling_table=sampling_table) if couples: # one gradient update per sentence (one sentence = a few 1000s of word couples) X = np.array(couples, dtype="int32") loss = model.train(X, labels)
model.add(Activation('relu')) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam') XX = model.get_input() YY = model.layers[0].get_output() F = theano.function([XX], YY) nb_epochs = 10 # you probably want to go longer than this batch_size = 256 #fig = plt.figure() try: for e in range(nb_epochs): print('-' * 40) progbar = generic_utils.Progbar(X_train.shape[0]) for b in range(X_train.shape[0] / batch_size): f = b * batch_size l = (b + 1) * batch_size X_batch = X_train[f:l].astype('float32') y_batch = y_train[f:l].astype('float32') loss = model.train_on_batch(X_batch, y_batch) progbar.add(X_batch.shape[0]) scorev = model.evaluate(X_valid, y_valid, show_accuracy=True, verbose=0)[1] scoret = model.evaluate(X_test, y_test, show_accuracy=True, verbose=0)[1] print('Epoch: {0} | Valid: {1} | Test: {2}'.format(e, scorev, scoret))
iter_num = 0 losses = np.zeros((epoch_length, 5)) rpn_accuracy_rpn_monitor, rpn_accuracy_for_epoch = [], [] t0 = start_time = time.time() best_loss = np.Inf with open('out.csv', 'w') as f: f.write( 'Accuracy,RPN classifier,RPN regression,Detector classifier,Detector regression,Total' ) f.write('\t') try: for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: try: if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose: mean_overlapping_bboxes = float( sum(rpn_accuracy_rpn_monitor)) / len( rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print( 'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations' .format(mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.' )
num_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder, '../models/labelencoder.pkl') #model = BOW_QI(joint_method = "concat") #model = BOW_QI(joint_method = "mcb") #model = BOW_QI(joint_method = "mul") #model = LSTM_QI(joint_method = "concat") #model = LSTM_QI(joint_method = "mcb") model = LSTM_QI(joint_method="mul") model.build(num_classes) print 'Training...' for epoch in xrange(args.num_epochs): print "epoch", epoch index_shuf = range(len(questions_train)) shuffle(index_shuf) progbar = generic_utils.Progbar(len(questions_train)) for batch in range(int(np.ceil(len(questions_train) / args.batch_size))): V, Q, A = [], [], [] for i in range(args.batch_size * batch, args.batch_size * (batch + 1)): if i >= len(questions_train): break question = questions_train[index_shuf[i]] V.append(images_train[question["image_index"]]) Q.append( model.extract_question_feature(nlp, question["question"])) A.append( np_utils.to_categorical( labelencoder.transform([question["answer"]]), num_classes))
#cifar-10 20%数据 + Data Augmentation.训练结果 # 设置生成参数 img_generator = ImageDataGenerator(rotation_range=20, width_shift_range=0.2, height_shift_range=0.2, zoom_range=0.2) model_2 = build_model() model_2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) # Data Augmentation for e in range(epochs): print('Epoch', e) print('Training...') progbar = generic_utils.Progbar(x_train_part.shape[0]) batches = 0 for x_batch, y_batch in img_generator.flow(x_train_part, y_train_part, batch_size=batch_size, shuffle=True): loss, train_acc = model_2.train_on_batch(x_batch, y_batch) batches += x_batch.shape[0] if batches > x_train_part.shape[0]: break progbar.add(x_batch.shape[0], values=[('train loss', loss), ('train acc', train_acc)]) loss, acc = model_2.evaluate(x_test, y_test, batch_size=32) print('Loss: ', loss) print('Accuracy: ', acc)
# inpu/oputputt channels in image input_channels = 1 output_channels = 1 # image dims input_img_dim = (input_channels, im_width, im_height) output_img_dim = (output_channels, im_width, im_height) generator = pix2pix_generator(input_img_dim) discriminator = super_lightweight_discriminator() opt_discriminator = Adam(lr=1E-4, beta_1=0.9, beta_2=0.999, epsilon=1e-08) discriminator.compile(loss='binary_crossentropy', optimizer=opt_discriminator) batch_size = 1 nb_epoch = 100 n_images_per_epoch = 10 print('Training starting...') for epoch in range(0, nb_epoch): print('Epoch {}'.format(epoch)) batch_counter = 1 start = time.time() progbar = keras_generic_utils.Progbar(n_images_per_epoch) # go through 1... n_images_per_epoch (which will go through all buckets as well for mini_batch_i in range(0, n_images_per_epoch, batch_size)
formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--name', help='Name of the created file HDF5 file.', default='sofia.h5') parser.add_argument('--meta', help='Path to the meta HDF5 file') parser.add_argument('--source', help='Where bulgaria.h5 resides') parser.add_argument('--chunksize', help='Chunk size to iterate over HDF5 file', type=int, default=50000) args = parser.parse_args() with pd.get_store(args.meta) as meta_store, \ pd.get_store(args.source) as data_store, \ pd.HDFStore(args.name, 'w', complevel=9, complib='blosc') as sofia_store: site_info = meta_store['site_info'] sofia = site_info.query("Region == 'SOFIA_CITY'") progbar = generic_utils.Progbar(data_store.get_storer('data').nrows) for chunk in data_store.select('data', chunksize=args.chunksize): chunk['in_sofia'] = chunk.site_ID.apply( lambda x: int(x) in sofia.index) chunk = chunk.query('in_sofia == True') del chunk['in_sofia'] sofia_store.append('data', chunk, data_columns=True) progbar.add(args.chunksize) print 'Sofia users stored in {}'.format(args.name)
def run_train(train_path, output_weight_path, config_filename, parser='simple', input_weight_path=None, network='resnet50', num_rois=32, lr=1e-5, iters=100, num_epochs=100, overlap_th=0.7): C = config.Config() C.model_path = output_weight_path C.num_rois = int(num_rois) if network == 'vgg': C.network = 'vgg' from keras_frcnn import vgg as nn elif network == 'resnet50': from keras_frcnn import resnet as nn C.network = 'resnet50' else: print('Not a valid model') raise ValueError if parser == 'pascal_voc': from keras_frcnn.pascal_voc_parser import get_data elif parser == 'simple': from keras_frcnn.simple_parser import get_data else: print('Wrong parser method') raise ValueError if input_weight_path is not None: C.base_net_weights = input_weight_path else: C.base_net_weights = nn.get_weight_path() all_imgs, classes_count, class_mapping = get_data(train_path) if 'bg' not in classes_count: classes_count['bg'] = 0 class_mapping['bg'] = len(class_mapping) C.class_mapping = class_mapping inv_map = {v: k for k, v in class_mapping.items()} print('Training images per class:') pprint.pprint(classes_count) print('Num classes (including bg) = {}'.format(len(classes_count))) config_output_filename = config_filename with open(config_output_filename, 'wb') as config_f: pickle.dump(C, config_f) print( 'Config has been written to {}, and can be loaded when testing to ensure correct results' .format(config_output_filename)) random.shuffle(all_imgs) num_imgs = len(all_imgs) train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] print('Num train samples {}'.format(len(train_imgs))) print('Num val samples {}'.format(len(val_imgs))) data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='val') if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) else: input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(None, 4)) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable=True) model_rpn = Model(img_input, rpn[:2]) model_classifier = Model([img_input, roi_input], classifier) # this is a model that holds both the RPN and the classifier, used to load/save weights for the models model_all = Model([img_input, roi_input], rpn[:2] + classifier) try: print('loading weights from {}'.format(C.base_net_weights)) model_rpn.load_weights(C.base_net_weights, by_name=True) model_classifier.load_weights(C.base_net_weights, by_name=True) except: print('Could not load pretrained model weights...') optimizer = Adam(lr=lr) optimizer_classifier = Adam(lr=lr) from keras_frcnn import losses as losses model_rpn.compile(optimizer=optimizer, loss=[ losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors) ]) model_classifier.compile( optimizer=optimizer_classifier, loss=[ losses.class_loss_cls, losses.class_loss_regr(len(classes_count) - 1) ], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'}) model_all.compile(optimizer='sgd', loss='mae') epoch_length = int(iters) num_epochs = int(num_epochs) iter_num = 0 losses = np.zeros((epoch_length, 5)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] start_time = time.time() best_loss = np.Inf class_mapping_inv = {v: k for k, v in class_mapping.items()} print('Starting training') vis = True for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: try: if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose: mean_overlapping_bboxes = float( sum(rpn_accuracy_rpn_monitor)) / len( rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print( 'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations' .format(mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.' ) X, Y, img_data = next(data_gen_train) loss_rpn = model_rpn.train_on_batch(X, Y) P_rpn = model_rpn.predict_on_batch(X) R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, K.image_dim_ordering(), use_regr=True, overlap_thresh=overlap_th, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = roi_helpers.calc_iou( R, img_data, C, class_mapping) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: neg_samples = neg_samples[0] else: neg_samples = [] if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] rpn_accuracy_rpn_monitor.append(len(pos_samples)) rpn_accuracy_for_epoch.append((len(pos_samples))) if C.num_rois > 1: if len(pos_samples) < C.num_rois // 2: selected_pos_samples = pos_samples.tolist() else: selected_pos_samples = np.random.choice( pos_samples, C.num_rois // 2, replace=False).tolist() try: selected_neg_samples = np.random.choice( neg_samples, C.num_rois - len(selected_pos_samples), replace=False).tolist() except: selected_neg_samples = np.random.choice( neg_samples, C.num_rois - len(selected_pos_samples), replace=True).tolist() sel_samples = selected_pos_samples + selected_neg_samples else: # in the extreme case where num_rois = 1, we pick a random pos or neg sample selected_pos_samples = pos_samples.tolist() selected_neg_samples = neg_samples.tolist() if np.random.randint(0, 2): sel_samples = random.choice(neg_samples) else: sel_samples = random.choice(pos_samples) loss_class = model_classifier.train_on_batch( [X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]) losses[iter_num, 0] = loss_rpn[1] losses[iter_num, 1] = loss_rpn[2] losses[iter_num, 2] = loss_class[1] losses[iter_num, 3] = loss_class[2] losses[iter_num, 4] = loss_class[3] iter_num += 1 progbar.update( iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])), ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))]) if iter_num == epoch_length: loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) loss_class_cls = np.mean(losses[:, 2]) loss_class_regr = np.mean(losses[:, 3]) class_acc = np.mean(losses[:, 4]) mean_overlapping_bboxes = float(sum( rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) rpn_accuracy_for_epoch = [] if C.verbose: print( 'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}' .format(mean_overlapping_bboxes)) print( 'Classifier accuracy for bounding boxes from RPN: {}' .format(class_acc)) print('Loss RPN classifier: {}'.format(loss_rpn_cls)) print('Loss RPN regression: {}'.format(loss_rpn_regr)) print('Loss Detector classifier: {}'.format( loss_class_cls)) print('Loss Detector regression: {}'.format( loss_class_regr)) print('Elapsed time: {}'.format(time.time() - start_time)) curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr iter_num = 0 start_time = time.time() if curr_loss < best_loss: if C.verbose: print( 'Total loss decreased from {} to {}, saving weights' .format(best_loss, curr_loss)) best_loss = curr_loss model_all.save_weights(C.model_path) break except Exception as e: print('Exception: {}'.format(e)) continue print('Training complete, exiting.')
# Prefer mean_absolute_error, since that is the one used for the challenge # Saving model jsonString = model.to_json() open(model_file_name + '.json', 'w').write(jsonString) minScore = 1.0 # raw_input('WAIT') print 'Training started...' for k in xrange(num_epochs): #shuffle the data points before going through them random.shuffle(vidNames) progbar = generic_utils.Progbar(len(vidNames)) for i in xrange(numBatch): # Read numPerBatch files, get the images and answers # print 'Starting reading the batch' X_batch, Y_batch = readData(vidNames[(i*numPerBatch):((i+1)*numPerBatch)], trueVal, 'C') # print X_batch.shape X_batch = X_batch.reshape(X_batch.shape[0], 1, row, col) # print X_batch.shape # print 'Finished reading the batch' X_batch = X_batch.astype('float32') X_batch /= 255
def my_train(args): # create output finder if not os.path.exists(os.path.expanduser(args.datasetpath)): os.mkdir(args.datasetpath) # create figures if not os.path.exists('./figures'): os.mkdir('./figures') # load data procImage, rawImage, procImage_val, rawImage_val, rawImage_test = my_load_data( args.datasetpath) print('procImage.shape : ', procImage.shape) print('rawImage.shape : ', rawImage.shape) print('procImage_val : ', procImage_val.shape) print('rawImage_val : ', rawImage_val.shape) # パッチサイズと画像サイズを指定 img_shape = rawImage.shape[-3:] print('img_shape : ', img_shape) patch_num = (img_shape[0] // args.patch_size) * (img_shape[1] // args.patch_size) disc_img_shape = (args.patch_size, args.patch_size, procImage.shape[-1]) print('disc_img_shape : ', disc_img_shape) # train opt_dcgan = Adam(lr=1E-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08) opt_discriminator = Adam(lr=1E-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08) # load generator model generator_model = models.my_load_generator(img_shape, disc_img_shape) # load discriminator model discriminator_model = models.my_load_DCGAN_discriminator( img_shape, disc_img_shape, patch_num) generator_model.compile(loss='mae', optimizer=opt_discriminator) discriminator_model.trainable = False DCGAN_model = models.my_load_DCGAN(generator_model, discriminator_model, img_shape, args.patch_size) loss = [l1_loss, 'binary_crossentropy'] loss_weights = [1E1, 1] DCGAN_model.compile(loss=loss, loss_weights=loss_weights, optimizer=opt_dcgan) discriminator_model.trainable = True discriminator_model.compile(loss='binary_crossentropy', optimizer=opt_discriminator) # start training print('start training') for e in range(args.epoch): # 時間計測 starttime = time.time() # シャッフル iterを作る perm = np.random.permutation(rawImage.shape[0]) X_procImage = procImage[perm] X_rawImage = rawImage[perm] X_procImageIter = [ X_procImage[i:i + args.batch_size] for i in range(0, rawImage.shape[0], args.batch_size) ] X_rawImageIter = [ X_rawImage[i:i + args.batch_size] for i in range(0, rawImage.shape[0], args.batch_size) ] b_it = 0 # 経過確認用 progbar = generic_utils.Progbar(len(X_procImageIter) * args.batch_size) for (X_proc_batch, X_raw_batch) in zip(X_procImageIter, X_rawImageIter): b_it += 1 X_disc, y_disc = get_disc_batch(X_proc_batch, X_raw_batch, generator_model, b_it, args.patch_size) raw_disc, _ = get_disc_batch(X_raw_batch, X_raw_batch, generator_model, 1, args.patch_size) x_disc = X_disc + raw_disc # update the discriminator disc_loss = discriminator_model.train_on_batch(x_disc, y_disc) # create a batch to feed the generator model 順番入れ替え idx = np.random.choice(procImage.shape[0], args.batch_size) X_gen_target, X_gen = procImage[idx], rawImage[idx] y_gen = np.zeros((X_gen.shape[0], 2), dtype=np.uint8) y_gen[:, 1] = 1 # Freeze the discriminator discriminator_model.trainable = False gen_loss = DCGAN_model.train_on_batch(X_gen, [X_gen_target, y_gen]) # Unfreeze the discriminator discriminator_model.trainable = True progbar.add(args.batch_size, values=[("D logloss", disc_loss), ("G tot", gen_loss[0]), ("G L1", gen_loss[1]), ("G logloss", gen_loss[2])]) # save images for visualization if b_it % (procImage.shape[0] // args.batch_size // 2) == 0: plot_generated_batch(X_proc_batch, X_raw_batch, generator_model, args.batch_size, b_it, "training") idx = np.random.choice(procImage_val.shape[0], args.batch_size, replace=False) X_gen_target, X_gen = procImage_val[idx], rawImage_val[idx] plot_generated_batch(X_gen_target, X_gen, generator_model, args.batch_size, b_it, "validation") idx = np.random.choice(rawImage_test.shape[0], rawImage_test.shape[0], replace=False) X_gen = rawImage_test[idx] plot_generated_batch_test(X_gen, generator_model, args.batch_size, b_it) print("") print('Epoch %s/%s, Time: %s' % (e + 1, args.epoch, time.time() - starttime))
def train(**kwargs): """ Train model Load the whole train data in memory for faster operations args: **kwargs (dict) keyword arguments that specify the model hyperparameters """ # Roll out the parameters batch_size = kwargs["batch_size"] n_batch_per_epoch = kwargs["n_batch_per_epoch"] nb_epoch = kwargs["nb_epoch"] model_name = kwargs["model_name"] generator = kwargs["generator"] image_data_format = kwargs["image_data_format"] img_dim = kwargs["img_dim"] patch_size = kwargs["patch_size"] bn_mode = kwargs["bn_mode"] label_smoothing = kwargs["use_label_smoothing"] label_flipping = kwargs["label_flipping"] dset = kwargs["dset"] use_mbd = kwargs["use_mbd"] epoch_size = n_batch_per_epoch * batch_size # Setup environment (logging directory etc) #general_utils.setup_logging(model_name) # Load and rescale data #X_full_train, X_sketch_train, X_full_val, X_sketch_val = data_utils.load_data(dset, image_data_format) img_dim = (256, 256, 3) # Manual entry # Get the number of non overlapping patch and the size of input image to the discriminator nb_patch, img_dim_disc = data_utils.get_nb_patch(img_dim, patch_size, image_data_format) try: # Create optimizers opt_dcgan = Adam(lr=1E-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08) # opt_discriminator = SGD(lr=1E-3, momentum=0.9, nesterov=True) opt_discriminator = Adam(lr=1E-3, beta_1=0.9, beta_2=0.999, epsilon=1e-08) # Load generator model generator_model = models.load("generator_unet_%s" % generator, img_dim, nb_patch, bn_mode, use_mbd, batch_size) # Load discriminator model discriminator_model = models.load("DCGAN_discriminator", img_dim_disc, nb_patch, bn_mode, use_mbd, batch_size) generator_model.compile(loss="mae", optimizer=opt_discriminator) discriminator_model.trainable = False DCGAN_model = models.DCGAN(generator_model, discriminator_model, img_dim, patch_size, image_data_format) loss = [l1_loss, 'binary_crossentropy'] loss_weights = [1E1, 1] DCGAN_model.compile(loss=loss, loss_weights=loss_weights, optimizer=opt_dcgan) discriminator_model.trainable = True discriminator_model.compile(loss='binary_crossentropy', optimizer=opt_discriminator) gen_loss = 100 disc_loss = 100 best_loss = [100] * 3 # Start training print("Start training") for e in range(nb_epoch): # Initialize progbar and batch counter progbar = generic_utils.Progbar(epoch_size) batch_counter = 1 start = time.time() for X_full_batch, X_sketch_batch in data_utils.facades_generator( img_dim, batch_size=batch_size): X_gen, X_gen_target = next( data_utils.facades_generator(img_dim, batch_size=batch_size)) generator_model.train_on_batch(X_gen, X_gen_target) # Create a batch to feed the discriminator model X_disc, y_disc = data_utils.get_disc_batch( X_full_batch, X_sketch_batch, generator_model, batch_counter, patch_size, image_data_format, label_smoothing=label_smoothing, label_flipping=label_flipping) # Update the discriminator disc_loss = discriminator_model.train_on_batch( X_disc, y_disc) # X_disc, y_disc # Create a batch to feed the generator model X_gen, X_gen_target = next( data_utils.facades_generator(img_dim, batch_size=batch_size)) y_gen = np.zeros((X_gen.shape[0], 2), dtype=np.uint8) y_gen[:, 1] = 1 # Freeze the discriminator discriminator_model.trainable = False gen_loss = DCGAN_model.train_on_batch(X_gen, [X_gen_target, y_gen]) # Unfreeze the discriminator discriminator_model.trainable = True batch_counter += 1 progbar.add(batch_size, values=[("D logloss", disc_loss), ("G tot", gen_loss[0]), ("G L1", gen_loss[1]), ("G logloss", gen_loss[2])]) # Save images for visualization if batch_counter % (n_batch_per_epoch / 2) == 0: # Get new images from validation figure_name = "training_" + str(e) data_utils.plot_generated_batch( X_full_batch, X_sketch_batch, generator_model, batch_size, image_data_format, figure_name) if batch_counter >= n_batch_per_epoch: break print("") print(('Epoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start))) if e % 5 == 0: gen_weights_path = os.path.join( '../../models/%s/gen_weights_epoch%s.h5' % (model_name, e)) generator_model.save_weights(gen_weights_path, overwrite=True) disc_weights_path = os.path.join( '../../models/%s/disc_weights_epoch%s.h5' % (model_name, e)) discriminator_model.save_weights(disc_weights_path, overwrite=True) DCGAN_weights_path = os.path.join( '../../models/%s/DCGAN_weights_epoch%s.h5' % (model_name, e)) DCGAN_model.save_weights(DCGAN_weights_path, overwrite=True) Best_gen_L1_weights_path = os.path.join( '../../models/%s/best_gen_L1_weights_epoch.h5' % (model_name)) if (gen_loss[1] <= best_loss[1]): generator_model.save_weights(Best_gen_L1_weights_path, overwrite=True) best_loss[1] = gen_loss[1] Best_gen_Totweights_path = os.path.join( '../../models/%s/best_gen_Totweights_epoch.h5' % (model_name)) if (gen_loss[0] <= best_loss[0]): generator_model.save_weights(Best_gen_Totweights_path, overwrite=True) best_loss[0] = gen_loss[0] except KeyboardInterrupt: pass
def main(): start_time = time.time() parser = argparse.ArgumentParser( prog='trainLSTM_MLP.py', description='Train LSTM-MLP model for visual question answering') parser.add_argument('--mlp-hidden-units', type=int, default=1024, metavar='<mlp-hidden-units>') parser.add_argument('--lstm-hidden-units', type=int, default=512, metavar='<lstm-hidden-units>') parser.add_argument('--mlp-hidden-layers', type=int, default=3, metavar='<mlp-hidden-layers>') parser.add_argument('--lstm-hidden-layers', type=int, default=1, metavar='<lstm-hidden-layers>') parser.add_argument('--dropout', type=float, default=0.5, metavar='<dropout-rate>') parser.add_argument('--mlp-activation', type=str, default='tanh', metavar='<activation-function>') parser.add_argument('--num-epochs', type=int, default=20, metavar='<num-epochs>') parser.add_argument('--batch-size', type=int, default=128, metavar='<batch-size>') parser.add_argument('--learning-rate', type=float, default=0.001, metavar='<learning-rate>') parser.add_argument('--dev-accuracy-path', type=str, required=True, metavar='<accuracy-path>') args = parser.parse_args() word_vec_dim = 300 img_dim = 4096 max_len = 30 ###################### # Load Data # ###################### data_dir = '/home/mlds/data/0.2_val/' print('Loading data...') train_id_pairs, train_image_ids = LoadIds('train', data_dir) dev_id_pairs, dev_image_ids = LoadIds('dev', data_dir) train_questions = LoadQuestions('train', data_dir) dev_questions = LoadQuestions('dev', data_dir) train_choices = LoadChoices('train', data_dir) dev_choices = LoadChoices('dev', data_dir) train_answers = LoadAnswers('train', data_dir) dev_answers = LoadAnswers('dev', data_dir) print('Finished loading data.') print('Time: %f s' % (time.time() - start_time)) ###################### # Model Descriptions # ###################### print('Generating and compiling model...') # image model (CNN features) image_model = Sequential() image_model.add(Reshape(input_shape=(img_dim, ), dims=(img_dim, ))) # language model (LSTM) language_model = Sequential() if args.lstm_hidden_layers == 1: language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=False, input_shape=(max_len, word_vec_dim))) else: language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=True, input_shape=(max_len, word_vec_dim))) for i in range(args.lstm_hidden_layers - 2): language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=True)) language_model.add( LSTM(output_dim=args.lstm_hidden_units, return_sequences=False)) # feedforward model (MLP) model = Sequential() model.add( Merge([language_model, image_model], mode='concat', concat_axis=1)) for i in range(args.mlp_hidden_layers): model.add(Dense(args.mlp_hidden_units, init='uniform')) model.add(Activation(args.mlp_activation)) model.add(Dropout(args.dropout)) model.add(Dense(word_vec_dim)) #model.add(Activation('softmax')) json_string = model.to_json() model_filename = 'models/mse_lstm_units_%i_layers_%i_mlp_units_%i_layers_%i_%s_lr%.1e_dropout%.2f' % ( args.lstm_hidden_units, args.lstm_hidden_layers, args.mlp_hidden_units, args.mlp_hidden_layers, args.mlp_activation, args.learning_rate, args.dropout) #model_filename = 'models/vgg_lstm_units_%i_layers_%i_mlp_units_%i_layers_%i_%s_lr%.1e_dropout%.2f_loss_cosine' % (args.lstm_hidden_units, args.lstm_hidden_layers, args.mlp_hidden_units, args.mlp_hidden_layers, args.mlp_activation, args.learning_rate, args.dropout) open(model_filename + '.json', 'w').write(json_string) # loss and optimizer rmsprop = RMSprop(lr=args.learning_rate) #model.compile(loss='categorical_crossentropy', optimizer=rmsprop) model.compile(loss="mse", optimizer=rmsprop) print('Compilation finished.') print('Time: %f s' % (time.time() - start_time)) ######################################## # Load CNN Features and Word Vectors # ######################################## # load VGG features print('Loading VGG features...') VGG_features, img_map = LoadVGGFeatures() print('VGG features loaded') print('Time: %f s' % (time.time() - start_time)) # load GloVe vectors print('Loading GloVe vectors...') word_embedding, word_map = LoadGloVe() print('GloVe vectors loaded') print('Time: %f s' % (time.time() - start_time)) ###################### # Make Batches # ###################### print('Making batches...') # training batches train_question_batches = [ b for b in MakeBatches( train_questions, args.batch_size, fillvalue=train_questions[-1]) ] train_answer_batches = [ b for b in MakeBatches(train_answers['toks'], args.batch_size, fillvalue=train_answers['toks'][-1]) ] train_image_batches = [ b for b in MakeBatches( train_image_ids, args.batch_size, fillvalue=train_image_ids[-1]) ] train_indices = list(range(len(train_question_batches))) # validation batches dev_question_batches = [ b for b in MakeBatches( dev_questions, args.batch_size, fillvalue=dev_questions[-1]) ] dev_answer_batches = [ b for b in MakeBatches(dev_answers['labs'], args.batch_size, fillvalue=dev_answers['labs'][-1]) ] dev_choice_batches = [ b for b in MakeBatches( dev_choices, args.batch_size, fillvalue=dev_choices[-1]) ] dev_image_batches = [ b for b in MakeBatches( dev_image_ids, args.batch_size, fillvalue=dev_image_ids[-1]) ] print('Finished making batches.') print('Time: %f s' % (time.time() - start_time)) ###################### # Training # ###################### acc_file = open(args.dev_accuracy_path, 'w') dev_accs = [] max_acc = -1 max_acc_epoch = -1 # define interrupt handler def PrintDevAcc(): print('Max validation accuracy epoch: %i' % max_acc_epoch) print(dev_accs) def InterruptHandler(sig, frame): print(str(sig)) PrintDevAcc() sys.exit(-1) signal.signal(signal.SIGINT, InterruptHandler) signal.signal(signal.SIGTERM, InterruptHandler) # print training information print('-' * 80) print('Training Information') print('# of LSTM hidden units: %i' % args.lstm_hidden_units) print('# of LSTM hidden layers: %i' % args.lstm_hidden_layers) print('# of MLP hidden units: %i' % args.mlp_hidden_units) print('# of MLP hidden layers: %i' % args.mlp_hidden_layers) print('Dropout: %f' % args.dropout) print('MLP activation function: %s' % args.mlp_activation) print('# of training epochs: %i' % args.num_epochs) print('Batch size: %i' % args.batch_size) print('Learning rate: %f' % args.learning_rate) print('# of train questions: %i' % len(train_questions)) print('# of dev questions: %i' % len(dev_questions)) print('-' * 80) acc_file.write('-' * 80 + '\n') acc_file.write('Training Information\n') acc_file.write('# of LSTM hidden units: %i\n' % args.lstm_hidden_units) acc_file.write('# of LSTM hidden layers: %i\n' % args.lstm_hidden_layers) acc_file.write('# of MLP hidden units: %i\n' % args.mlp_hidden_units) acc_file.write('# of MLP hidden layers: %i\n' % args.mlp_hidden_layers) acc_file.write('Dropout: %f\n' % args.dropout) acc_file.write('MLP activation function: %s\n' % args.mlp_activation) acc_file.write('# of training epochs: %i\n' % args.num_epochs) acc_file.write('Batch size: %i\n' % args.batch_size) acc_file.write('Learning rate: %f\n' % args.learning_rate) acc_file.write('# of train questions: %i\n' % len(train_questions)) acc_file.write('# of dev questions: %i\n' % len(dev_questions)) acc_file.write('-' * 80 + '\n') # start training print('Training started...') for k in range(args.num_epochs): print('-' * 80) print('Epoch %i' % (k + 1)) progbar = generic_utils.Progbar(len(train_indices) * args.batch_size) # shuffle batch indices random.shuffle(train_indices) for i in train_indices: X_question_batch = GetQuestionsTensor(train_question_batches[i], word_embedding, word_map) X_image_batch = GetImagesMatrix(train_image_batches[i], img_map, VGG_features) Y_answer_batch = GetAnswersMatrix(train_answer_batches[i], word_embedding, word_map) loss = model.train_on_batch([X_question_batch, X_image_batch], Y_answer_batch) loss = loss[0].tolist() progbar.add(args.batch_size, values=[('train loss', loss)]) print('Time: %f s' % (time.time() - start_time)) # evaluate on dev set pbar = generic_utils.Progbar( len(dev_question_batches) * args.batch_size) dev_correct = 0 # feed forward for i in range(len(dev_question_batches)): X_question_batch = GetQuestionsTensor(dev_question_batches[i], word_embedding, word_map) X_image_batch = GetImagesMatrix(dev_image_batches[i], img_map, VGG_features) prob = model.predict_proba([X_question_batch, X_image_batch], args.batch_size, verbose=0) # get word vecs of choices choice_feats = GetChoicesTensor(dev_choice_batches[i], word_embedding, word_map) similarity = np.zeros((5, args.batch_size), float) # calculate cosine distances for j in range(5): similarity[j] = np.diag( pairwise_distances(prob, choice_feats[j], metric='euclidean')) # take argmax of cosine distances pred = np.argmin(similarity, axis=0) + 1 if i != (len(dev_question_batches) - 1): dev_correct += np.count_nonzero(dev_answer_batches[i] == pred) else: num_padding = args.batch_size * len( dev_question_batches) - len(dev_questions) last_idx = args.batch_size - num_padding dev_correct += np.count_nonzero( dev_answer_batches[:last_idx] == pred[:last_idx]) pbar.add(args.batch_size) dev_acc = float(dev_correct) / len(dev_questions) dev_accs.append(dev_acc) print('Validation Accuracy: %f' % dev_acc) print('Time: %f s' % (time.time() - start_time)) if dev_acc > max_acc: max_acc = dev_acc max_acc_epoch = k model.save_weights(model_filename + '_best.hdf5', overwrite=True) #model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k+1)) print(dev_accs) for acc in dev_accs: acc_file.write('%f\n' % acc) print('Best validation accuracy: %f; epoch#%i' % (max_acc, (max_acc_epoch + 1))) acc_file.write('Best validation accuracy: %f; epoch#%i\n' % (max_acc, (max_acc_epoch + 1))) print('Training finished.') acc_file.write('Training finished.\n') print('Time: %f s' % (time.time() - start_time)) acc_file.write('Time: %f s\n' % (time.time() - start_time)) acc_file.close()
def train(train_path, input_weight_path=None, num_rois=None, network=None, parser=None, horizontal_flips=None, vertical_flips=None, rot_90=None, num_epochs=None, num_epochs_len=None, config_filename=None, output_weight_path=None): """ Keyword arguments: train_path -- Path to training data. (required) input_weight_path -- Input path for weights. If not specified, will try to load default weights provided by keras. (Default false) num_rois -- Number of RoIs to process at once. (Default 32) network -- Base network to use. Supports vgg or resnet50. (Default 'resnet50') parser -- Parser to use. One of simple or pascal_voc. (Default 'pascal_voc') horizontal_flips -- Augment with horizontal flips in training. (Default false). vertical_flips -- Augment with vertical flips in training. (Default false). rot_90 -- Augment with 90 degree rotations in training. (Default false). num_epochs -- Number of epochs. (Default 200) num_epochs_len -- Length of epochs. (Default 1000) config_filename -- Location to store all the metadata related to the training (to be used when testing). (Default 'config.pickle') output_weight_path -- Output path for weights. (Default './model_frcnn.hdf5') """ global losses num_rois = 32 if num_rois is None else num_rois network = 'resnet50' if network is None else network parser = 'pascal_voc' if parser is None else parser horizontal_flips = False if horizontal_flips is None else horizontal_flips vertical_flips = False if vertical_flips is None else vertical_flips rot_90 = False if rot_90 is None else rot_90 num_epochs = 200 if num_epochs is None else num_epochs num_epochs_len = 1000 if num_epochs_len is None else num_epochs_len config_filename = 'config.pickle' if config_filename is None else config_filename output_weight_path = './model_frcnn.hdf5' if output_weight_path is None else output_weight_path if parser == 'pascal_voc': from ekfrcnn.kfrcnn.pascal_voc_parser import get_data elif parser == 'simple': from ekfrcnn.kfrcnn.simple_parser import get_data else: raise ValueError( "Command line option parser must be one of 'pascal_voc' or 'simple'" ) # persist the settings in the config object C = config.Config() C.use_horizontal_flips = bool(horizontal_flips) C.use_vertical_flips = bool(vertical_flips) C.rot_90 = bool(rot_90) C.model_path = output_weight_path C.num_rois = int(num_rois) if network == 'vgg': from ekfrcnn.kfrcnn import vgg as nn C.network = 'vgg' elif network == 'resnet50': from ekfrcnn.kfrcnn import resnet as nn C.network = 'resnet50' else: print('Not a valid model') raise ValueError if input_weight_path: C.base_net_weights = input_weight_path else: # set the path to weights based on backend and model C.base_net_weights = nn.get_weight_path() all_imgs, classes_count, class_mapping = get_data(train_path) print('all_imgs {}'.format(all_imgs)) if 'bg' not in classes_count: classes_count['bg'] = 0 class_mapping['bg'] = len(class_mapping) C.class_mapping = class_mapping inv_map = {v: k for k, v in class_mapping.items()} print('Training images per class:') pprint.pprint(classes_count) print('Num classes (including bg) = {}'.format(len(classes_count))) config_output_filename = config_filename with open(config_output_filename, 'wb') as config_f: pickle.dump(C, config_f) print( 'Config has been written to {}, and can be loaded when testing to ensure correct results' .format(config_output_filename)) random.shuffle(all_imgs) num_imgs = len(all_imgs) train_imgs = [s for s in all_imgs if s['imageset'] == 'trainval'] val_imgs = [s for s in all_imgs if s['imageset'] == 'test'] print('Num train samples {}'.format(len(train_imgs))) print('Num val samples {}'.format(len(val_imgs))) data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='train') data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, C, nn.get_img_output_length, K.image_dim_ordering(), mode='val') if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) else: input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(None, 4)) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) rpn = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(shared_layers, roi_input, C.num_rois, nb_classes=len(classes_count), trainable=True) model_rpn = Model(img_input, rpn[:2]) model_classifier = Model([img_input, roi_input], classifier) # this is a model that holds both the RPN and the classifier, used to load/save weights for the models model_all = Model([img_input, roi_input], rpn[:2] + classifier) try: print('loading weights from {}'.format(C.base_net_weights)) model_rpn.load_weights(C.base_net_weights, by_name=True) model_classifier.load_weights(C.base_net_weights, by_name=True) except: print('Could not load pretrained model weights.') optimizer = Adam(lr=1e-5) optimizer_classifier = Adam(lr=1e-5) model_rpn.compile(optimizer=optimizer, loss=[ losses.rpn_loss_cls(num_anchors), losses.rpn_loss_regr(num_anchors) ]) model_classifier.compile( optimizer=optimizer_classifier, loss=[ losses.class_loss_cls, losses.class_loss_regr(len(classes_count) - 1) ], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'}) model_all.compile(optimizer='sgd', loss='mae') epoch_length = num_epochs_len num_epochs = int(num_epochs) iter_num = 0 losses = np.zeros((epoch_length, 5)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] start_time = time.time() best_loss = np.Inf class_mapping_inv = {v: k for k, v in class_mapping.items()} print('Starting training') vis = True for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: try: if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose: print('first print:{}'.format( len(rpn_accuracy_rpn_monitor))) mean_overlapping_bboxes = float( sum(rpn_accuracy_rpn_monitor)) / len( rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print( 'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations' .format(mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.' ) X, Y, img_data = next(data_gen_train) loss_rpn = model_rpn.train_on_batch(X, Y) P_rpn = model_rpn.predict_on_batch(X) R = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], C, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = roi_helpers.calc_iou( R, img_data, C, class_mapping) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: neg_samples = neg_samples[0] else: neg_samples = [] print('neg_samples:{}'.format(len(neg_samples))) if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] rpn_accuracy_rpn_monitor.append(len(pos_samples)) rpn_accuracy_for_epoch.append((len(pos_samples))) if C.num_rois > 1: if len(pos_samples) < C.num_rois // 2: selected_pos_samples = pos_samples.tolist() else: selected_pos_samples = np.random.choice( pos_samples, C.num_rois // 2, replace=False).tolist() try: selected_neg_samples = np.random.choice( neg_samples, C.num_rois - len(selected_pos_samples), replace=False).tolist() except: selected_neg_samples = np.random.choice( neg_samples, C.num_rois - len(selected_pos_samples), replace=True).tolist() sel_samples = selected_pos_samples + selected_neg_samples else: # in the extreme case where num_rois = 1, we pick a random pos or neg sample selected_pos_samples = pos_samples.tolist() selected_neg_samples = neg_samples.tolist() if np.random.randint(0, 2): sel_samples = random.choice(neg_samples) else: sel_samples = random.choice(pos_samples) loss_class = model_classifier.train_on_batch( [X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]) losses[iter_num, 0] = loss_rpn[1] losses[iter_num, 1] = loss_rpn[2] losses[iter_num, 2] = loss_class[1] losses[iter_num, 3] = loss_class[2] losses[iter_num, 4] = loss_class[3] iter_num += 1 progbar.update( iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])), ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))]) if iter_num == epoch_length: loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) loss_class_cls = np.mean(losses[:, 2]) loss_class_regr = np.mean(losses[:, 3]) class_acc = np.mean(losses[:, 4]) print('second print:{}'.format( len(rpn_accuracy_for_epoch))) mean_overlapping_bboxes = float(sum( rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) rpn_accuracy_for_epoch = [] if C.verbose: print( 'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}' .format(mean_overlapping_bboxes)) print( 'Classifier accuracy for bounding boxes from RPN: {}' .format(class_acc)) print('Loss RPN classifier: {}'.format(loss_rpn_cls)) print('Loss RPN regression: {}'.format(loss_rpn_regr)) print('Loss Detector classifier: {}'.format( loss_class_cls)) print('Loss Detector regression: {}'.format( loss_class_regr)) print('Elapsed time: {}'.format(time.time() - start_time)) curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr iter_num = 0 start_time = time.time() if curr_loss < best_loss: if C.verbose: print( 'Total loss decreased from {} to {}, saving weights' .format(best_loss, curr_loss)) best_loss = curr_loss model_all.save_weights(C.model_path) break except Exception as e: print('Exception: {}'.format(e)) continue print('Training complete, exiting.')
# train the model, output generated text after each iteration train_num_samples = int(samples_frac * len(train_sequences)) valid_num_samples = int(samples_frac * len(valid_sequences)) train_num_samples = (train_num_samples // batch_size) * batch_size valid_num_samples = (valid_num_samples // batch_size) * batch_size best_loss = 10000 iteration = 0 while True: iteration += 1 print() print('-' * 50) print('Iteration', iteration) print("Training") progbar = generic_utils.Progbar(train_num_samples) gen = samples_generator(train_sequences, batch_size, num_samples=train_num_samples) for X, y in gen: loss, accuracy = model.train_on_batch(X, y, accuracy=True) progbar.add(batch_size, values=[("train loss", loss), ("train acc", accuracy)]) print() print("Validating") progbar = generic_utils.Progbar(valid_num_samples) gen = samples_generator(valid_sequences, batch_size, num_samples=valid_num_samples) valid_loss = 0
def train_kitti(): # config for data argument cfg = config.Config() cfg.use_horizontal_flips = True cfg.use_vertical_flips = True cfg.rot_90 = True cfg.num_rois = 32 cfg.base_net_weights = os.path.join('./model/', nn.get_weight_path()) # cfg.base_net_weights=r'' # TODO: the only file should to be change for other data to train cfg.model_path = '/media/private/Ci/log/plane/frcnn/vgg-adam' now = datetime.datetime.now() day = now.strftime('%y-%m-%d') for i in range(10000): if not os.path.exists('%s-%s-%d' % (cfg.model_path, day, i)): cfg.model_path = '%s-%s-%d' % (cfg.model_path, day, i) break make_dir(cfg.model_path) make_dir(cfg.model_path + '/loss') make_dir(cfg.model_path + '/loss_rpn_cls') make_dir(cfg.model_path + '/loss_rpn_regr') make_dir(cfg.model_path + '/loss_class_cls') make_dir(cfg.model_path + '/loss_class_regr') cfg.simple_label_file = '/media/public/GEOWAY/plane/plane0817.csv' all_images, classes_count, class_mapping = get_data(cfg.simple_label_file) if 'bg' not in classes_count: classes_count['bg'] = 0 class_mapping['bg'] = len(class_mapping) cfg.class_mapping = class_mapping cfg.config_save_file = os.path.join(cfg.model_path, 'config.pickle') with open(cfg.config_save_file, 'wb') as config_f: pickle.dump(cfg, config_f) print( 'Config has been written to {}, and can be loaded when testing to ensure correct results' .format(cfg.config_save_file)) inv_map = {v: k for k, v in class_mapping.items()} print('Training images per class:') pprint.pprint(classes_count) print('Num classes (including bg) = {}'.format(len(classes_count))) random.shuffle(all_images) num_imgs = len(all_images) train_imgs = [s for s in all_images if s['imageset'] == 'trainval'] val_imgs = [s for s in all_images if s['imageset'] == 'test'] print('Num train samples {}'.format(len(train_imgs))) print('Num val samples {}'.format(len(val_imgs))) data_gen_train = data_generators.get_anchor_gt(train_imgs, classes_count, cfg, nn.get_img_output_length, K.image_dim_ordering(), mode='train') data_gen_val = data_generators.get_anchor_gt(val_imgs, classes_count, cfg, nn.get_img_output_length, K.image_dim_ordering(), mode='val') Q = multiprocessing.Manager().Queue(maxsize=30) def fill_Q(n): while True: if not Q.full(): Q.put(next(data_gen_train)) #print(Q.qsize(),'put',n) else: time.sleep(0.00001) threads = [] for i in range(4): thread = multiprocessing.Process(target=fill_Q, args=(i, )) threads.append(thread) thread.start() if K.image_dim_ordering() == 'th': input_shape_img = (3, None, None) else: input_shape_img = (None, None, 3) img_input = Input(shape=input_shape_img) roi_input = Input(shape=(None, 4)) # define the base network (resnet here, can be VGG, Inception, etc) shared_layers = nn.nn_base(img_input, trainable=True) # define the RPN, built on the base layers num_anchors = len(cfg.anchor_box_scales) * len(cfg.anchor_box_ratios) rpn = nn.rpn(shared_layers, num_anchors) classifier = nn.classifier(shared_layers, roi_input, cfg.num_rois, nb_classes=len(classes_count), trainable=True) model_rpn = Model(img_input, rpn[:2]) model_classifier = Model([img_input, roi_input], classifier) # this is a model that holds both the RPN and the classifier, used to load/save weights for the models model_all = Model([img_input, roi_input], rpn[:2] + classifier) # model_all.summary() from keras.utils import plot_model # os.environ['PATH'] = os.environ['PATH'] + r';C:\Program Files (x86)\Graphviz2.38\bin;' plot_model(model_all, 'model_all.png', show_layer_names=True, show_shapes=True) plot_model(model_classifier, 'model_classifier.png', show_layer_names=True, show_shapes=True) plot_model(model_rpn, 'model_rpn.png', show_layer_names=True, show_shapes=True) ''' try: print('loading weights from {}'.format(cfg.base_net_weights)) model_rpn.load_weights(cfg.model_path, by_name=True) model_classifier.load_weights(cfg.model_path, by_name=True) except Exception as e: print(e) print('Could not load pretrained model weights. Weights can be found in the keras application folder ' 'https://github.com/fchollet/keras/tree/master/keras/applications') ''' optimizer = adadelta() optimizer_classifier = adadelta() model_rpn.compile(optimizer=optimizer, loss=[ losses_fn.rpn_loss_cls(num_anchors), losses_fn.rpn_loss_regr(num_anchors) ]) model_classifier.compile( optimizer=optimizer_classifier, loss=[ losses_fn.class_loss_cls, losses_fn.class_loss_regr(len(classes_count) - 1) ], metrics={'dense_class_{}'.format(len(classes_count)): 'accuracy'}) model_all.compile(optimizer='sgd', loss='mae') epoch_length = 10 num_epochs = int(cfg.num_epochs) iter_num = 0 losses = np.zeros((epoch_length, 5)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] start_time = time.time() best_loss = np.Inf best_rpn_cls = np.Inf best_rpn_regr = np.Inf best_class_cls = np.Inf best_class_regr = np.Inf class_mapping_inv = {v: k for k, v in class_mapping.items()} print('Starting training') vis = True for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: try: if len(rpn_accuracy_rpn_monitor ) == epoch_length and cfg.verbose: mean_overlapping_bboxes = float( sum(rpn_accuracy_rpn_monitor)) / len( rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print( 'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations' .format(mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap' ' the ground truth boxes. Check RPN settings or keep training.' ) # X, Y, img_data = next(data_gen_train) while True: if Q.empty(): time.sleep(0.00001) continue X, Y, img_data = Q.get() # print(Q.qsize(),'get') break # print(X.shape,Y.shape) loss_rpn = model_rpn.train_on_batch(X, Y) P_rpn = model_rpn.predict_on_batch(X) result = roi_helpers.rpn_to_roi(P_rpn[0], P_rpn[1], cfg, K.image_dim_ordering(), use_regr=True, overlap_thresh=0.7, max_boxes=300) # note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format X2, Y1, Y2, IouS = roi_helpers.calc_iou( result, img_data, cfg, class_mapping) if X2 is None: rpn_accuracy_rpn_monitor.append(0) rpn_accuracy_for_epoch.append(0) continue neg_samples = np.where(Y1[0, :, -1] == 1) pos_samples = np.where(Y1[0, :, -1] == 0) if len(neg_samples) > 0: neg_samples = neg_samples[0] else: neg_samples = [] if len(pos_samples) > 0: pos_samples = pos_samples[0] else: pos_samples = [] rpn_accuracy_rpn_monitor.append(len(pos_samples)) rpn_accuracy_for_epoch.append((len(pos_samples))) if cfg.num_rois > 1: if len(pos_samples) < cfg.num_rois // 2: selected_pos_samples = pos_samples.tolist() else: selected_pos_samples = np.random.choice( pos_samples, cfg.num_rois // 2, replace=False).tolist() try: selected_neg_samples = np.random.choice( neg_samples, cfg.num_rois - len(selected_pos_samples), replace=False).tolist() except: selected_neg_samples = np.random.choice( neg_samples, cfg.num_rois - len(selected_pos_samples), replace=True).tolist() sel_samples = selected_pos_samples + selected_neg_samples else: # in the extreme case where num_rois = 1, we pick a random pos or neg sample selected_pos_samples = pos_samples.tolist() selected_neg_samples = neg_samples.tolist() if np.random.randint(0, 2): sel_samples = random.choice(neg_samples) else: sel_samples = random.choice(pos_samples) loss_class = model_classifier.train_on_batch( [X, X2[:, sel_samples, :]], [Y1[:, sel_samples, :], Y2[:, sel_samples, :]]) losses[iter_num, 0] = loss_rpn[1] losses[iter_num, 1] = loss_rpn[2] losses[iter_num, 2] = loss_class[1] losses[iter_num, 3] = loss_class[2] losses[iter_num, 4] = loss_class[3] iter_num += 1 progbar.update( iter_num, [('rpn_cls', np.mean(losses[:iter_num, 0])), ('rpn_regr', np.mean(losses[:iter_num, 1])), ('detector_cls', np.mean(losses[:iter_num, 2])), ('detector_regr', np.mean(losses[:iter_num, 3]))]) if iter_num == epoch_length: loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) loss_class_cls = np.mean(losses[:, 2]) loss_class_regr = np.mean(losses[:, 3]) class_acc = np.mean(losses[:, 4]) mean_overlapping_bboxes = float(sum( rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) rpn_accuracy_for_epoch = [] if cfg.verbose: print( 'Mean number of bounding boxes from RPN overlapping ground truth boxes: {}' .format(mean_overlapping_bboxes)) print( 'Classifier accuracy for bounding boxes from RPN: {}' .format(class_acc)) print('Loss RPN classifier: {}'.format(loss_rpn_cls)) print('Loss RPN regression: {}'.format(loss_rpn_regr)) print('Loss Detector classifier: {}'.format( loss_class_cls)) print('Loss Detector regression: {}'.format( loss_class_regr)) print('Elapsed time: {}'.format(time.time() - start_time)) curr_loss = loss_rpn_cls + loss_rpn_regr + loss_class_cls + loss_class_regr iter_num = 0 start_time = time.time() if curr_loss < best_loss: if cfg.verbose: print( 'Total loss decreased from {} to {}, saving weights' .format(best_loss, curr_loss)) best_loss = curr_loss model_all.save_weights( '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5' % (cfg.model_path, 'loss', epoch_num, curr_loss, loss_rpn_cls, loss_rpn_regr, loss_class_cls, loss_class_regr)) if loss_rpn_cls < best_rpn_cls: if cfg.verbose: print( 'loss_rpn_cls decreased from {} to {}, saving weights' .format(best_rpn_cls, loss_rpn_cls)) best_rpn_cls = loss_rpn_cls model_all.save_weights( '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5' % (cfg.model_path, 'loss_rpn_cls', epoch_num, curr_loss, loss_rpn_cls, loss_rpn_regr, loss_class_cls, loss_class_regr)) if loss_rpn_regr < best_rpn_regr: if cfg.verbose: print( 'loss_rpn_regr decreased from {} to {}, saving weights' .format(best_rpn_regr, loss_rpn_regr)) best_rpn_regr = loss_rpn_regr model_all.save_weights( '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5' % (cfg.model_path, 'loss_rpn_regr', epoch_num, curr_loss, loss_rpn_cls, loss_rpn_regr, loss_class_cls, loss_class_regr)) if loss_class_cls < best_class_cls: if cfg.verbose: print( 'loss_class_cls decreased from {} to {}, saving weights' .format(best_loss, loss_class_cls)) best_class_cls = loss_class_cls model_all.save_weights( '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5' % (cfg.model_path, 'loss_class_cls', epoch_num, curr_loss, loss_rpn_cls, loss_rpn_regr, loss_class_cls, loss_class_regr)) if loss_class_regr < best_class_regr: if cfg.verbose: print( 'loss_class_regr decreased from {} to {}, saving weights' .format(best_loss, loss_class_regr)) best_class_regr = loss_class_regr model_all.save_weights( '%s/%s/E-%d-loss-%.4f-rpnc-%.4f-rpnr-%.4f-cls-%.4f-cr-%.4f.hdf5' % (cfg.model_path, 'loss_class_regr', epoch_num, curr_loss, loss_rpn_cls, loss_rpn_regr, loss_class_cls, loss_class_regr)) break except Exception as e: # print('Exception: {}'.format(e)) # save model # model_all.save_weights(cfg.model_path) continue print('Training complete, exiting.')
else: batchSizeFinal = round((numGPUs - .5) * batchSize) #get final prediction size in case need to reshape segmentation predictions_dim = predictions._keras_shape # use DataLoader class to randomly flip and crop images loader_train = DataLoader(X1, Y1, IMAGE_SIZE, crop_size) loader_test = DataLoader(X1_test, Y1_test, IMAGE_SIZE, crop_size) runningLoss_total = np.zeros(shape=(epochNum, 2)) # train model for e in range(epochNum): print('Epoch', e) start = time.time() batches = 0 progbar = generic_utils.Progbar( math.ceil(IMAGE_SIZE[0] / batchSizeFinal) * batchSizeFinal) runningLoss = 0.0 runningLossTest = 0.0 while (batches <= IMAGE_SIZE[0] / batchSizeFinal): x_batch, y_batch, temp_images, temp_labels = loader_train.next_batch( batchSizeFinal) y_batch_crop = y_batch[:, 0:predictions_dim[1], 0:predictions_dim[2], :] model_loss = model.train_on_batch(x_batch, y_batch_crop) batches += 1 runningLoss = ((runningLoss * (batches - 1)) + model_loss) / (batches) #x_batch_test, y_batch_test, temp_images, temp_labels = loader_test.next_batch(batchSizeFinal) #y_batch_crop_test = y_batch_test[:, 0:predictions_dim[1], 0:predictions_dim[2], :]
# training uses sequences of length 1 to 20. Test uses series of length 100. def test_model(model, file_name, min_size=100): I, V, sw = get_sample(batch_size=500, n_bits=input_dim, max_size=min_size + 1, min_size=min_size) Y = np.asarray(model.predict(I, batch_size=100) > .5).astype('float64') acc = (V[:, -min_size:, :] == Y[:, -min_size:, :]).mean() * 100 show_pattern(Y[0], V[0], sw[0], file_name) return acc ##### TRAIN ###### nb_epoch = 4000 progbar = generic_utils.Progbar(nb_epoch) for e in range(nb_epoch): I, V, sw = get_sample(n_bits=input_dim, max_size=20, min_size=1, batch_size=100) loss1 = model.train_on_batch(I, V, sample_weight=sw) loss2 = lstm.train_on_batch(I, V, sample_weight=sw) progbar.add(1, values=[("NTM", loss1), ("LSTM", loss2)]) if e % 500 == 0: print("") acc1 = test_model(model, 'ntm.png') acc2 = test_model(lstm, 'lstm.png')
def main(): # Set seed. np.random.seed(SEED) tf.set_random_seed(SEED) # Read config. C = config.Config() class_mapping = C.class_mapping # Read training data. data_train, class_count, _ = get_data(TRAIN_ANNOT_PATH, TRAIN_DATA_PATH, C.img_types) # Load base model. if C.network == 'vgg16': from faster_rcnn.base_models import vgg16 as base_model elif C.network == 'resnet50': from faster_rcnn.base_models import resnet50 as base_model else: print('Not a valid base model!') sys.exit(1) # Read validation data. if USE_VALIDATION: data_val, _, _ = get_data(VAL_ANNOT_PATH, VAL_DATA_PATH, C.img_types) # Create paths. if MODEL_NAME != None: #model_name = C.model_path + '_' + datetime.today().strftime('%y%m%d') + '_' + MODEL_NAME model_name = C.model_path + '_' + MODEL_NAME if os.path.exists(os.path.join(MODELS_PATH, model_name)): print('Model already exist.') sys.exit(1) else: model_name = C.model_path + '_' + datetime.today().strftime('%y%m%d') + '_' + silly_name_gen() model_path = os.path.join(MODELS_PATH, model_name) weights_path = os.path.join(model_path, 'weights.hdf5') config_path = os.path.join(model_path, 'config.pickle') config_json_path = os.path.join(model_path, 'config.json') record_path = os.path.join(model_path, 'record.csv') C.weights_path = weights_path # Create model folder. create_model_folder(model_name) # Save config. with open(config_path, 'wb') as f: pickle.dump(C, f) with open(config_json_path, 'w') as f: json.dump(C.__dict__, f, indent=4) # Create generators. data_train_gen = get_tile_generator(data_train, C, base_model.get_img_output_length, class_count, base_model.preprocess, train_mode=True, verbose=False) if USE_VALIDATION: data_val_gen = get_tile_generator(data_val, C, base_model.get_img_output_length, class_count, base_model.preprocess, train_mode=False, verbose=False) # Define shapes. img_input_shape = (None, None, 3) img_input = Input(shape=img_input_shape) roi_input = Input(shape=(None, 4)) n_anchors = len(C.anchor_box_scales) * len(C.anchor_box_ratios) # Define base network with shared layers, RPN and classifier. base_net_output_layer = base_model.nn_base(img_input, trainable=C.base_net_trainable, weights=C.base_net_weights) rpn = rpn_layer(base_net_output_layer, n_anchors) classifier = base_model.classifier_layer( base_net_output_layer, roi_input, C.n_rois, nb_classes=len(class_mapping) ) # Create models. model_rpn = Model(img_input, rpn[:2]) model_classifier = Model([img_input, roi_input], classifier) model_all = Model([img_input, roi_input], rpn[:2] + classifier) # Create var for recording training process. df_record = pd.DataFrame( columns=[ 'elapsed_time', 'mean_overlapping_bboxes', 'val_mean_overlapping_bboxes', 'loss_rpn_cls', 'val_loss_rpn_cls', 'loss_rpn_regr', 'val_loss_rpn_regr', 'loss_detector_cls', 'val_loss_detector_cls', 'loss_detector_regr', 'val_loss_detector_regr', 'total_loss', 'val_total_loss', 'detector_acc', 'val_detector_acc', 'model_improvement' ] ) # Compile models. model_rpn.compile( optimizer=Adam(lr=1e-5 * 5.0), #SGD(lr=1e-3, momentum=0.9, decay=0.0005), loss=[ rpn_loss_cls(n_anchors), rpn_loss_regr(n_anchors) ] ) model_classifier.compile( optimizer=Adam(lr=1e-5 * 5.0), #SGD(lr=1e-3, momentum=0.9, decay=0.0005), loss=[ class_loss_cls, class_loss_regr(len(class_mapping)-1) ], metrics={ 'dense_class_{}'.format(len(class_mapping)): 'accuracy' } ) model_all.compile( optimizer='sgd', loss='mae' ) # Setup Tensorboard. callback = TensorBoard(model_path) callback.set_model(model_all) # Training settings. iter_num = 0 train_step = 0 losses = np.zeros((EPOCH_LENGTH, 5)) rpn_accuracy_for_epoch = [] best_total_loss = np.Inf # Start training. start_time = time.time() print('\n\nStart training.') for epoch_num in range(N_EPOCHS): pbar = generic_utils.Progbar(EPOCH_LENGTH) print('Epoch {}/{}'.format(epoch_num + 1, N_EPOCHS)) while True: # Get next batch (image). img, Y, img_data, img_debug, best_anchor_for_bbox, debug_n_pos = next(data_train_gen) # If no GT boxes. if len(img_data['bboxes']) == 0: continue # Train on batch. loss_rpn = model_rpn.train_on_batch(img, Y) # Get predicted RPN from RPN model [rpn_cls, rpn_regr]. P_rpn = model_rpn.predict_on_batch(img) ''' if iter_num == 0: colormap = [ ((166,206,227), (31,120,180)), # Light Blue, Blue ((178,223,138), (51,160,44)), # Light Green, Green ((251,154,153), (227,26,28)), # Light Red, Red ((253,191,111), (255,127,0)), # Light Orange, Orange ((202,178,214), (106,61,154)), # Light Purple, Purple ] img_debug = cv2.cvtColor(img_debug, cv2.COLOR_BGR2GRAY) img_debug = cv2.cvtColor(img_debug, cv2.COLOR_GRAY2RGB) _cls = Y[0][0] _regr = Y[1][0] pos_cls = np.where(_cls==1) pos_regr = np.where(_regr==1) for i in range(debug_n_pos): color = colormap[i%len(colormap)][0] idx = pos_regr[2][i*4]/4 anchor_size = C.anchor_box_scales[int(idx/len(C.anchor_box_ratios))] anchor_ratio = C.anchor_box_ratios[int(idx%len(C.anchor_box_ratios))] center = (pos_regr[1][i*4]*C.rpn_stride, pos_regr[0][i*4]*C.rpn_stride) anchor_width = anchor_size*anchor_ratio[0] anchor_height = anchor_size*anchor_ratio[1] cv2.circle(img_debug, center, 3, color, -1) cv2.rectangle( img_debug, (center[0]-int(anchor_width/2), center[1]-int(anchor_height/2)), (center[0]+int(anchor_width/2), center[1]+int(anchor_height/2)), color, 2 ) plt.figure(figsize=(8,8)) plt.imshow(img_debug) plt.title(img_data['filepath']) plt.show() for i in range(9): fig, ax = plt.subplots() im = ax.imshow(Y[0][0, :, :, i]) plt.colorbar(im) plt.title('isValid' + str(i)) plt.show() fig, ax = plt.subplots() im = ax.imshow(Y[0][0, :, :, i+9]) plt.colorbar(im) plt.title('isObject' + str(i)) plt.show() fig, ax = plt.subplots() im = ax.imshow(P_rpn[0][0, :, :, i]) plt.colorbar(im) plt.title('Prediction' + str(i)) plt.show() ''' # R: bboxes (shape=(300,4)) # Convert RPN layer to ROI bboxes. R = rpn_to_roi( P_rpn[0], P_rpn[1], C, use_regr=True, overlap_thresh=0.7, max_boxes=300 ) # Note: calc_iou converts from (x1,y1,x2,y2) to (x,y,w,h) format # X2: bboxes that iou > C.classifier_min_overlap for all gt bboxes in 300 non_max_suppression bboxes. # Y1: one hot code for bboxes from above => x_roi (X) # Y2: corresponding labels and corresponding gt bboxes X2, Y1, Y2, IouS = calc_iou(R, img_data, C, class_mapping) # If X2 is None means there are no matching bboxes if X2 is None: rpn_accuracy_for_epoch.append(0) continue sel_samples, n_pos_samples = get_selected_samples(Y1, C) rpn_accuracy_for_epoch.append(n_pos_samples) # training_data: [img, X2[:, sel_samples, :]] # labels: [Y1[:, sel_samples, :], Y2[:, sel_samples, :]] # img => img_data resized image # X2[:, sel_samples, :] => n_rois (4 in here) bboxes which contains selected neg and pos # Y1[:, sel_samples, :] => one hot encode for n_rois bboxes which contains selected neg and pos # Y2[:, sel_samples, :] => labels and gt bboxes for n_rois bboxes which contains selected neg and pos loss_detector = model_classifier.train_on_batch( [ img, X2[:, sel_samples, :] ], [ Y1[:, sel_samples, :], Y2[:, sel_samples, :] ] ) # Log losses. losses[iter_num, 0] = loss_rpn[1] losses[iter_num, 1] = loss_rpn[2] write_log( callback, ['rpn_cls_loss', 'rpn_reg_loss'], [loss_rpn[1], loss_rpn[2]], train_step ) losses[iter_num, 2] = loss_detector[1] losses[iter_num, 3] = loss_detector[2] losses[iter_num, 4] = loss_detector[3] write_log( callback, ['detector_cls_loss', 'detector_reg_loss', 'detector_acc'], [loss_detector[1], loss_detector[2], loss_detector[3]], train_step ) iter_num += 1 train_step += 1 pbar.update( iter_num, [ ('rpn_cls', losses[iter_num-1, 0]), ('rpn_regr', losses[iter_num-1, 1]), ('detector_cls', losses[iter_num-1, 2]), ('detector_regr', losses[iter_num-1, 3]) ] ) if iter_num == EPOCH_LENGTH: # Compute epoch losses. loss_rpn_cls = np.mean(losses[:, 0]) loss_rpn_regr = np.mean(losses[:, 1]) loss_detector_cls = np.mean(losses[:, 2]) loss_detector_regr = np.mean(losses[:, 3]) class_acc = np.mean(losses[:, 4]) mean_overlapping_bboxes = float(sum(rpn_accuracy_for_epoch)) / len(rpn_accuracy_for_epoch) rpn_accuracy_for_epoch = [] elapsed_time = (time.time() - start_time) curr_total_loss = loss_rpn_cls + loss_rpn_regr + loss_detector_cls + loss_detector_regr iter_num = 0 if C.verbose: print('') if mean_overlapping_bboxes == 0: print('RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.') else: print('(TRAINING) Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(mean_overlapping_bboxes)) print('(TRAINING) Loss RPN classifier: {}'.format(loss_rpn_cls)) print('(TRAINING) Loss RPN regression: {}'.format(loss_rpn_regr)) print('(TRAINING) Loss Detector classifier: {}'.format(loss_detector_cls)) print('(TRAINING) Loss Detector regression: {}'.format(loss_detector_regr)) print('(TRAINING) Detector accuracy for bounding boxes from RPN: {}'.format(class_acc)) print('(TRAINING) Total Loss: {}'.format(curr_total_loss)) print('Elapsed time: ' + ms_output(elapsed_time)) print('') # Validation. record_row = {} if USE_VALIDATION: val_start_time = time.time() print('\nPerforming Validation.') val_rpn_accuracy = [] val_rpn_cls_loss = [] val_rpn_reg_loss = [] val_detector_cls_loss = [] val_detector_reg_loss = [] val_detector_acc = [] while True: try: img_val, Y_val, img_data_val, _, _, _ = next(data_val_gen) # Validate on batch. val_loss_rpn = model_rpn.test_on_batch(img_val, Y_val) P_rpn_val = model_rpn.predict_on_batch(img_val) R_val = rpn_to_roi( P_rpn_val[0], P_rpn_val[1], C, use_regr=True, overlap_thresh=0.7, max_boxes=300 ) X2_val, Y1_val, Y2_val, _ = calc_iou(R_val, img_data_val, C, class_mapping) if X2_val is None: continue val_sel_samples, val_n_pos_samples = get_selected_samples(Y1_val, C) val_loss_detector = model_classifier.test_on_batch( [ img_val, X2_val[:, val_sel_samples, :] ], [ Y1_val[:, val_sel_samples, :], Y2_val[:, val_sel_samples, :] ] ) val_rpn_accuracy.append(val_n_pos_samples) val_rpn_cls_loss.append(val_loss_rpn[1]) val_rpn_reg_loss.append(val_loss_rpn[2]) val_detector_cls_loss.append(val_loss_detector[1]) val_detector_reg_loss.append(val_loss_detector[2]) val_detector_acc.append(val_loss_detector[3]) except RuntimeError: break except StopIteration: break except: print(traceback.print_exc()) sys.exit(1) data_val_gen = get_tile_generator(data_val, C, base_model.get_img_output_length, class_count, base_model.preprocess, train_mode=False, verbose=False) val_mean_overlapping_bboxes = float(sum(val_rpn_accuracy)) / len(val_rpn_accuracy) val_total_loss = np.mean(val_rpn_cls_loss) + np.mean(val_rpn_reg_loss) + np.mean(val_detector_cls_loss) + np.mean(val_detector_reg_loss) print('(VALIDATION) Mean number of bounding boxes from RPN overlapping ground truth boxes: {}'.format(val_mean_overlapping_bboxes)) print('(VALIDATION) Mean Loss RPN classifier: {}'.format(np.mean(val_rpn_cls_loss))) print('(VALIDATION) Mean Loss RPN regression: {}'.format(np.mean(val_rpn_reg_loss))) print('(VALIDATION) Mean Loss Detector classifier: {}'.format(np.mean(val_detector_cls_loss))) print('(VALIDATION) Mean Loss Detector regression: {}'.format(np.mean(val_detector_reg_loss))) print('(VALIDATION) Mean Detector accuracy for bounding boxes from RPN: {}'.format(np.mean(val_detector_acc))) print('(VALIDATION) Total Loss: {}'.format(val_total_loss)) record_row['val_mean_overlapping_bboxes'] = round(val_mean_overlapping_bboxes, 3) record_row['val_detector_acc'] = round(np.mean(val_detector_acc), 3) record_row['val_loss_rpn_cls'] = round(np.mean(val_rpn_cls_loss), 3) record_row['val_loss_rpn_regr'] = round(np.mean(val_rpn_reg_loss), 3) record_row['val_loss_detector_cls'] = round(np.mean(val_detector_cls_loss), 3) record_row['val_loss_detector_regr'] = round(np.mean(val_detector_reg_loss), 3) record_row['val_total_loss'] = round(val_total_loss, 3) val_elapsed_time = (time.time() - val_start_time) print('Validation execution time: ' + ms_output(val_elapsed_time)) print('') if val_total_loss < best_total_loss: record_row['model_improvement'] = val_total_loss - best_total_loss if C.verbose: print('Total loss decreased from {} to {}, saving weights'.format(best_total_loss, val_total_loss)) print('') best_total_loss = val_total_loss model_all.save_weights(weights_path) else: record_row['model_improvement'] = None else: record_row['val_mean_overlapping_bboxes'] = None record_row['val_detector_acc'] = None record_row['val_loss_rpn_cls'] = None record_row['val_loss_rpn_regr'] = None record_row['val_loss_detector_cls'] = None record_row['val_loss_detector_regr'] = None record_row['val_total_loss'] = None if curr_total_loss < best_total_loss: record_row['model_improvement'] = curr_total_loss - best_total_loss if C.verbose: print('Total loss decreased from {} to {}, saving weights'.format(best_total_loss, curr_total_loss)) print('') best_total_loss = curr_total_loss model_all.save_weights(weights_path) else: record_row['model_improvement'] = None # Log epoch averages. write_log( callback, [ 'Elapsed_time', 'mean_overlapping_bboxes', 'mean_rpn_cls_loss', 'mean_rpn_reg_loss', 'mean_detector_cls_loss', 'mean_detector_reg_loss', 'mean_detector_acc', 'total_loss' ], [ elapsed_time/60, mean_overlapping_bboxes, loss_rpn_cls, loss_rpn_regr, loss_detector_cls, loss_detector_regr, class_acc, curr_total_loss ], epoch_num ) record_row['mean_overlapping_bboxes'] = round(mean_overlapping_bboxes, 3) record_row['detector_acc'] = round(class_acc, 3) record_row['loss_rpn_cls'] = round(loss_rpn_cls, 3) record_row['loss_rpn_regr'] = round(loss_rpn_regr, 3) record_row['loss_detector_cls'] = round(loss_detector_cls, 3) record_row['loss_detector_regr'] = round(loss_detector_regr, 3) record_row['total_loss'] = round(curr_total_loss, 3) record_row['elapsed_time'] = round(elapsed_time/60, 3) df_record = df_record.append(record_row, ignore_index=True) df_record.to_csv(record_path, index=0) break print('Training Complete! Exiting.') fig = plt.figure(figsize=(15,5)) plt.subplot(1,2,1) plt.plot(np.arange(0, df_record.shape[0]), df_record['mean_overlapping_bboxes'], 'r', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['val_mean_overlapping_bboxes'], 'b', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['mean_overlapping_bboxes'].rolling(window=20).mean(), 'r', label='Train') plt.plot(np.arange(0, df_record.shape[0]), df_record['val_mean_overlapping_bboxes'].rolling(window=20).mean(), 'b', label='Val') plt.title('mean_overlapping_bboxes') plt.legend() plt.subplot(1,2,2) plt.plot(np.arange(0, df_record.shape[0]), df_record['detector_acc'], 'r', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['val_detector_acc'], 'b', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['detector_acc'].rolling(window=20).mean(), 'r', label='Train') plt.plot(np.arange(0, df_record.shape[0]), df_record['val_detector_acc'].rolling(window=20).mean(), 'b', label='Val') plt.title('class_acc') plt.legend() fig.savefig(os.path.join(model_path, 'viz/accuracy.png')) fig = plt.figure(figsize=(15,5)) plt.subplot(1,2,1) plt.plot(np.arange(0, df_record.shape[0]), df_record['loss_rpn_cls'], 'r', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['val_loss_rpn_cls'], 'b', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['loss_rpn_cls'].rolling(window=20).mean(), 'r', label='Train') plt.plot(np.arange(0, df_record.shape[0]), df_record['val_loss_rpn_cls'].rolling(window=20).mean(), 'b', label='Val') plt.title('loss_rpn_cls') plt.legend() plt.subplot(1,2,2) plt.plot(np.arange(0, df_record.shape[0]), df_record['loss_rpn_regr'], 'r', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['val_loss_rpn_regr'], 'b', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['loss_rpn_regr'].rolling(window=20).mean(), 'r', label='Train') plt.plot(np.arange(0, df_record.shape[0]), df_record['val_loss_rpn_regr'].rolling(window=20).mean(), 'b', label='Val') plt.title('loss_rpn_regr') plt.legend() fig.savefig(os.path.join(model_path, 'viz/rpn_loss.png')) fig = plt.figure(figsize=(15,5)) plt.subplot(1,2,1) plt.plot(np.arange(0, df_record.shape[0]), df_record['loss_detector_cls'], 'r', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['val_loss_detector_cls'], 'b', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['loss_detector_cls'].rolling(window=20).mean(), 'r', label='Train') plt.plot(np.arange(0, df_record.shape[0]), df_record['val_loss_detector_cls'].rolling(window=20).mean(), 'b', label='Val') plt.title('loss_detector_cls') plt.legend() plt.subplot(1,2,2) plt.plot(np.arange(0, df_record.shape[0]), df_record['loss_detector_regr'], 'r', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['val_loss_detector_regr'], 'b', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['loss_detector_regr'].rolling(window=20).mean(), 'r', label='Train') plt.plot(np.arange(0, df_record.shape[0]), df_record['val_loss_detector_regr'].rolling(window=20).mean(), 'b', label='Val') plt.title('loss_detector_regr') plt.legend() fig.savefig(os.path.join(model_path, 'viz/detector_loss.png')) fig = plt.figure(figsize=(16,8)) plt.plot(np.arange(0, df_record.shape[0]), df_record['total_loss'], 'r', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['val_total_loss'], 'b', alpha=0.3) plt.plot(np.arange(0, df_record.shape[0]), df_record['total_loss'].rolling(window=20).mean(), 'r', label='Train') plt.plot(np.arange(0, df_record.shape[0]), df_record['val_total_loss'].rolling(window=20).mean(), 'b', label='Val') plt.title('total_loss') plt.legend() fig.savefig(os.path.join(model_path, 'viz/total_loss.png'))
def train(path, batch_size=10, epochs=50, steps_per_epoch=None): # input_shape = (256, 256, 3) # padding_width = 64 input_shape = (128, 128, 3) padding_width = 32 is_log = get_counter() data_generator = DataGenerator(path, input_shape[:2], batch_size, padding_width) data_size = len(data_generator) print('data size: {}'.format(data_size)) if steps_per_epoch is None: steps_per_epoch = data_size // batch_size G, D, C = build_model(input_shape, padding_width) t1_epochs = epochs * 16 // 100 t2_epochs = epochs * 2 // 100 t3_epochs = epochs * 70 // 100 g_history = {'loss': [], 'val_loss': []} d_history = {'loss': [], 'val_loss': []} c_history = {'loss': [], 'val_loss': []} class LogCallback(keras.callbacks.Callback): def on_batch_end(self, batch, logs=None): if is_log(): g_history['loss'].append(logs['loss']) padded_iamges, real_images = next( data_generator.validation_flow()) val_loss = G.evaluate(padded_iamges, real_images, batch_size) g_history['val_loss'].append(val_loss) plot_history(g_history, 'G') plot_generated_image(G, data_generator) print('Phase 1') if os.path.exists('checkpoint/g.h5'): G.load_weights('checkpoint/g.h5') g_history = json.load( open('checkpoint/g_history.json', 'r', encoding='utf-8')) print('get trained G weight') else: g_loss = G.fit_generator( data_generator.flow(), epochs=t1_epochs, steps_per_epoch=steps_per_epoch, validation_data=data_generator.validation_flow(), validation_steps=2, callbacks=[LogCallback()]) G.save_weights('checkpoint/g.h5') g_history = g_loss.history json.dump(g_history, open('checkpoint/g_history.json', 'w', encoding='utf-8')) json.dump(c_history, open('checkpoint/c_history.json', 'w', encoding='utf-8')) json.dump(d_history, open('checkpoint/d_history.json', 'w', encoding='utf-8')) json.dump(g_history, open('checkpoint/g_end_history.json', 'w', encoding='utf-8')) C.save_weights('checkpoint/c.h5') D.save_weights('checkpoint/d.h5') G.save_weights('checkpoint/g_end.h5') print('Phase 2') counter = 0 D.summary() for cur_epoch in range(t2_epochs): print('Epoch {}/{}'.format(cur_epoch, t2_epochs)) progbar = generic_utils.Progbar(steps_per_epoch) for d in itertools.islice(data_generator.flow(), None, steps_per_epoch): padded_iamges, real_images = d fake_images = G.predict(padded_iamges) d_loss_real = D.train_on_batch( real_images, np.ones(batch_size, dtype='float32')) d_loss_fake = D.train_on_batch( fake_images, np.zeros(batch_size, dtype='float32')) d_loss = (d_loss_real + d_loss_fake) / 2 progbar.add(1, values=[("D loss", d_loss)]) if is_log(): d_history['loss'].append(float(d_loss)) padded_iamges, real_images = next( data_generator.validation_flow()) combined_images, labels = combine_image_and_label( G, padded_iamges, real_images, batch_size) d_val_loss = D.evaluate(combined_images, labels) d_history['val_loss'].append(float(d_val_loss)) plot_history(d_history, 'D') print('Phase 3') for cur_epoch in range(t3_epochs): print('Epoch {}/{}'.format(cur_epoch, t3_epochs)) progbar = generic_utils.Progbar(steps_per_epoch) for d in itertools.islice(data_generator.flow(), None, steps_per_epoch): padded_iamges, real_images = d fake_images = G.predict(padded_iamges) d_loss_real = D.train_on_batch( real_images, np.ones(batch_size, dtype='float32')) d_loss_fake = D.train_on_batch( fake_images, np.zeros(batch_size, dtype='float32')) d_loss = (d_loss_real + d_loss_fake) / 2 c_loss = C.train_on_batch( padded_iamges, [real_images, np.ones((batch_size, 1), dtype='float32')]) progbar.add(1, values=[("D loss", d_loss), ('C loss', c_loss[0]), ('G loss', c_loss[1])]) if is_log(): d_history['loss'].append(float(d_loss)) c_history['loss'].append(float(c_loss[0])) g_history['loss'].append(float(c_loss[1])) padded_iamges, real_images = next( data_generator.validation_flow()) c_loss = C.evaluate( padded_iamges, [real_images, np.ones((batch_size, 1), dtype='float32')]) combined_images, labels = combine_image_and_label( G, padded_iamges, real_images, batch_size) d_loss = D.evaluate(combined_images, labels) d_history['val_loss'].append(float(d_loss)) c_history['val_loss'].append(float(c_loss[0])) g_history['val_loss'].append(float(c_loss[1])) plot_history(d_history, 'D') plot_history(c_history, 'C') plot_history(g_history, 'G') plot_generated_image(G, data_generator) G.save_weights('checkpoint/g_end.h5') json.dump(g_history, open('checkpoint/g_end_history.json', 'w', encoding='utf-8')) D.save_weights('checkpoint/d.h5') json.dump(d_history, open('checkpoint/d_history.json', 'w', encoding='utf-8'))
if i == 20: model_rpn.compile( loss={ 'regression': smooth_l1(), 'classification': cls_loss() }, optimizer=keras.optimizers.Adam(lr=Learning_rate / 10)) model_classifier.compile( loss=[class_loss_cls, class_loss_regr(NUM_CLASSES - 1)], metrics={'dense_class_{}'.format(NUM_CLASSES): 'accuracy'}, optimizer=keras.optimizers.Adam(lr=Learning_rate / 10)) print("Learning rate decrease") progbar = generic_utils.Progbar(EPOCH_LENGTH) print('Epoch {}/{}'.format(i + 1, EPOCH)) for iteration, batch in enumerate(rpn_train): if len(rpn_accuracy_rpn_monitor ) == EPOCH_LENGTH and config.verbose: mean_overlapping_bboxes = float(sum( rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print( 'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations' .format(mean_overlapping_bboxes, EPOCH_LENGTH)) if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.' )
def train_loop(identifier, model, optimizer, epoch_start, history, la_plotter, ia_train, ia_val, X_train, y_train, X_val, y_val): """Perform the training loop. Args: identifier: Identifier of the experiment. model: The network to train (and validate). optimizer: The network's optimizer (e.g. SGD). epoch_start: The epoch to start the training at. Usually 0, but can be higher if an old training is continued/loaded. history: The history for this training. Can be filled already, if an old training is continued/loaded. la_plotter: The plotter used to plot loss and accuracy values. Can be filled already, if an old training is continued/loaded. ia_train: ImageAugmenter to use to augment the training images. ia_val: ImageAugmenter to use to augment the validation images. X_train: The training set images. y_train: The training set labels (same persons, different persons). X_val: The validation set images. y_val: The validation set labels. """ # Loop over each epoch, i.e. executes 20 times if epochs set to 20 # start_epoch is not 0 if we continue an older model. for epoch in range(epoch_start, EPOCHS): print("Epoch", epoch) # Variables to collect the sums for loss and accuracy (for training and # validation dataset). We will use them to calculate the loss/acc per # example (which will be ploted and added to the history). loss_train_sum = 0 loss_val_sum = 0 acc_train_sum = 0 acc_val_sum = 0 nb_examples_train = X_train.shape[0] nb_examples_val = X_val.shape[0] # Training loop progbar = generic_utils.Progbar(nb_examples_train) for X_batch, Y_batch in flow_batches(X_train, y_train, ia_train, shuffle=True, train=True): loss, acc = model.train_on_batch(X_batch, Y_batch, accuracy=True) progbar.add(len(X_batch), values=[("train loss", loss), ("train acc", acc)]) loss_train_sum += (loss * len(X_batch)) acc_train_sum += (acc * len(X_batch)) # Validation loop progbar = generic_utils.Progbar(nb_examples_val) # Iterate over each batch in the validation data # and calculate loss and accuracy for each batch for X_batch, Y_batch in flow_batches(X_val, y_val, ia_val, shuffle=False, train=False): loss, acc = model.test_on_batch(X_batch, Y_batch, accuracy=True) progbar.add(len(X_batch), values=[("val loss", loss), ("val acc", acc)]) loss_val_sum += (loss * len(X_batch)) acc_val_sum += (acc * len(X_batch)) # Calculate the loss and accuracy for this epoch # (averaged over all training data batches) loss_train = loss_train_sum / nb_examples_train acc_train = acc_train_sum / nb_examples_train loss_val = loss_val_sum / nb_examples_val acc_val = acc_val_sum / nb_examples_val history.add(epoch, loss_train=loss_train, loss_val=loss_val, acc_train=acc_train, acc_val=acc_val) # Update plots with new data from this epoch # We start plotting _after_ the first epoch as the first one usually contains # a huge fall in loss (increase in accuracy) making it harder to see the # minor swings at epoch 1000 and later. if epoch > 0: la_plotter.add_values(epoch, loss_train=loss_train, loss_val=loss_val, acc_train=acc_train, acc_val=acc_val) # Save the history to a csv file if SAVE_CSV_FILEPATH is not None: csv_filepath = SAVE_CSV_FILEPATH.format(identifier=identifier) history.save_to_filepath(csv_filepath) # Save the weights and optimizer state to files swae = SAVE_WEIGHTS_AFTER_EPOCHS if swae and swae > 0 and (epoch + 1) % swae == 0: print("Saving model...") save_model_weights(model, SAVE_WEIGHTS_DIR, "{}.last.weights".format(identifier), overwrite=True) save_optimizer_state(optimizer, SAVE_OPTIMIZER_STATE_DIR, "{}.last.optstate".format(identifier), overwrite=True)
model.summary() # Define optimizer optim = SGD(lr=lr, momentum=0.9) # Compile model model.compile(loss='binary_crossentropy', optimizer=optim) # Archiv model architecture to json model_json = model.to_json() with open("/media/zhaojian/6TB/project/ms-celeb-1m/c2/models/ms-siamese.json", "w") as json_file: json_file.write(model_json) # Start training for e in range(nb_epoch): progbar = generic_utils.Progbar(nb_batch*batch_size) start = time.time() for b in range(nb_batch): # Get a batch of training pairs tr_X1_batch = np.zeros((batch_size,img_w,img_h,img_c), dtype=np.float32) tr_X1_file = tr_Pair1_file_list[b * batch_size:(b + 1) * batch_size] tr_label_batch = tr_Pair_label[b * batch_size:(b + 1) * batch_size] for i in range(batch_size): x = image.load_img(tr_X1_file[i], target_size=(224,224)) x = image.img_to_array(x) x = np.expand_dims(x, axis=0) x = preprocess_input(x) tr_X1_batch[i, ...] = x tr_X2_batch = np.zeros((batch_size,img_w,img_h,img_c), dtype=np.float32)
def main(): start_time = time.time() parser = argparse.ArgumentParser( prog='valLSTM_MLP.py', description='Test LSTM-MLP model for visual question answering') parser.add_argument('--model-vgg', type=str, required=True, metavar='<model-path>') parser.add_argument('--weights-vgg', type=str, required=True, metavar='<weights-path>') parser.add_argument('--model-inc', type=str, required=True, metavar='<model-path>') parser.add_argument('--weights-inc', type=str, required=True, metavar='<weights-path>') parser.add_argument('--output', type=str, required=True, metavar='<prediction-path>') args = parser.parse_args() word_vec_dim = 300 batch_size = 128 vgg_weight = 0.25 inc_weight = 1 - vgg_weight ####################### # Load Models # ####################### print('Loading models and weights...') model_vgg = model_from_json(open(args.model_vgg, 'r').read()) model_vgg.compile(loss='categorical_crossentropy', optimizer='rmsprop') model_vgg.load_weights(args.weights_vgg) model_inc = model_from_json(open(args.model_inc, 'r').read()) model_inc.compile(loss='categorical_crossentropy', optimizer='rmsprop') model_inc.load_weights(args.weights_inc) print('Models and weights loaded.') print('Time: %f s' % (time.time() - start_time)) ###################### # Load Data # ###################### data_dir = '/home/mlds/data/0.05_val/' print('Loading data...') #train_id_pairs, train_image_ids = LoadIds('train') #dev_id_pairs, dev_image_ids = LoadIds('dev') test_q_ids, test_image_ids = LoadIds('test', data_dir) #train_questions = LoadQuestions('train') #dev_questions = LoadQuestions('dev') test_questions = LoadQuestions('test', data_dir) #train_choices = LoadChoices('train') #dev_choices = LoadChoices('dev') test_choices = LoadChoices('test', data_dir) #train_answers = LoadAnswers('train') #dev_answers = LoadAnswers('dev') print('Finished loading data.') print('Time: %f s' % (time.time() - start_time)) ######################################## # Load CNN Features and Word Vectors # ######################################## # load VGG features print('Loading VGG features...') VGG_features, vgg_img_map = LoadVGGFeatures() print('VGG features loaded') print('Time: %f s' % (time.time() - start_time)) # load Inception features print('Loading Inception features...') INC_features, inc_img_map = LoadInceptionFeatures() print('Inception features loaded') print('Time: %f s' % (time.time() - start_time)) # load GloVe vectors print('Loading GloVe vectors...') word_embedding, word_map = LoadGloVe() print('GloVe vectors loaded') print('Time: %f s' % (time.time() - start_time)) ###################### # Make Batches # ###################### print('Making batches...') # train batches # train_question_batches = [ b for b in MakeBatches(train_questions, batch_size, fillvalue=train_questions[-1]) ] # train_answer_batches = [ b for b in MakeBatches(train_answers['labs'], batch_size, fillvalue=train_answers['labs'][-1]) ] # train_choice_batches = [ b for b in MakeBatches(train_choices, batch_size, fillvalue=train_choices[-1]) ] # train_image_batches = [ b for b in MakeBatches(train_image_ids, batch_size, fillvalue=train_image_ids[-1]) ] # validation batches # dev_question_batches = [ b for b in MakeBatches(dev_questions, batch_size, fillvalue=dev_questions[-1]) ] # dev_answer_batches = [ b for b in MakeBatches(dev_answers['labs'], batch_size, fillvalue=dev_answers['labs'][-1]) ] # dev_choice_batches = [ b for b in MakeBatches(dev_choices, batch_size, fillvalue=dev_choices[-1]) ] # dev_image_batches = [ b for b in MakeBatches(dev_image_ids, batch_size, fillvalue=dev_image_ids[-1]) ] # testing batches test_question_batches = [ b for b in MakeBatches( test_questions, batch_size, fillvalue=test_questions[-1]) ] test_choice_batches = [ b for b in MakeBatches( test_choices, batch_size, fillvalue=test_choices[-1]) ] test_image_batches = [ b for b in MakeBatches( test_image_ids, batch_size, fillvalue=test_image_ids[-1]) ] print('Finished making batches.') print('Time: %f s' % (time.time() - start_time)) ###################### # Testing # ###################### predictions = [] pbar = generic_utils.Progbar(len(test_question_batches) * batch_size) for i in range(len(test_question_batches)): # feed forward X_question_batch = GetQuestionsTensor(test_question_batches[i], word_embedding, word_map) X_vgg_image_batch = GetImagesMatrix(test_image_batches[i], vgg_img_map, VGG_features) X_inc_image_batch = GetImagesMatrix(test_image_batches[i], inc_img_map, INC_features) prob_vgg = model_vgg.predict_proba( [X_question_batch, X_vgg_image_batch], batch_size, verbose=0) prob_inc = model_inc.predict_proba( [X_question_batch, X_inc_image_batch], batch_size, verbose=0) prob = (vgg_weight * prob_vgg + inc_weight * prob_inc) # get word vecs of choices choice_feats = GetChoicesTensor(test_choice_batches[i], word_embedding, word_map) similarity = np.zeros((5, batch_size), float) # calculate cosine distances for j in range(5): similarity[j] = np.diag(cosine_similarity(prob, choice_feats[j])) # take argmax of cosine distances pred = np.argmax(similarity, axis=0) + 1 predictions.extend(pred.tolist()) pbar.add(batch_size) SavePredictions(args.output, predictions, test_q_ids) print('Time: %f s' % (time.time() - start_time)) print('Testing finished.')
def main(): cwd = os.getcwd() parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units', type=int, default=1024) parser.add_argument('-num_hidden_layers', type=int, default=3) parser.add_argument('-dropout', type=float, default=0.5) parser.add_argument('-activation', type=str, default='tanh') parser.add_argument('-language_only', type=bool, default=False) parser.add_argument('-num_epochs', type=int, default=2) parser.add_argument('-model_save_interval', type=int, default=10) parser.add_argument('-model_weights_path', type=str, default=cwd + '/vgg/vgg16_weights.h5') parser.add_argument('-batch_size', type=int, default=128) parser.add_argument('-questions_train', type=str, default=cwd + '/data/preprocessed/questions_train2015.txt') parser.add_argument('-answers_train', type=str, default=cwd + '/data/preprocessed/answers_train2015_modal.txt') parser.add_argument( '-im_dir', type=str, default=cwd + '/data/preprocessed/scene_img_abstract_v002_train2015/') #parser.add_argument('-questions_train',type=str, default = cwd+'/data/preprocessed/questions_train2014.txt') args = parser.parse_args() questions_train = open(args.questions_train, 'r').read().decode('utf8').splitlines() answers_train = open(args.answers_train, 'r').read().decode('utf8').splitlines() images_train = open(cwd + '/data/preprocessed/images_train2015.txt', 'r').read().decode('utf8').splitlines() #vgg_model_path = cwd+'/features/coco/vgg_feats.mat' #this needs to change maxAnswers = 100 questions_train, answers_train, images_train = selectFrequentAnswers( questions_train, answers_train, images_train, maxAnswers) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder, cwd + '/models/labelencoder.pkl') #features_struct = scipy.io.loadmat(vgg_model_path) #VGGfeatures = features_struct['feats'] # print 'loaded vgg features' # image_ids = open(cwd+'/features/coco_vgg_IDMap.txt').read().splitlines() # id_map = {} # for ids in image_ids: # id_split = ids.split() # id_map[id_split[0]] = int(id_split[1]) vgg_model = vgg16.VGG_16(args.model_weights_path) sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) vgg_model.compile(optimizer=sgd, loss='categorical_crossentropy') print 'loaded vgg model...' nlp = English() print 'loaded word2vec features...' img_dim = 4096 word_vec_dim = 300 model = Sequential() if args.language_only: model.add( Dense(args.num_hidden_units, input_dim=word_vec_dim, init='uniform')) else: model.add( Dense(args.num_hidden_units, input_dim=img_dim + word_vec_dim, init='uniform')) model.add(Activation(args.activation)) if args.dropout > 0: model.add(Dropout(args.dropout)) for i in xrange(args.num_hidden_layers - 1): model.add(Dense(args.num_hidden_units, init='uniform')) model.add(Activation(args.activation)) if args.dropout > 0: model.add(Dropout(args.dropout)) model.add(Dense(nb_classes, init='uniform')) model.add(Activation('softmax')) json_string = model.to_json() model_file_name = cwd + '/models/mlp_num_hidden_units_' + str( args.num_hidden_units) + '_num_hidden_layers_' + str( args.num_hidden_layers) open(model_file_name + '.json', 'w').write(json_string) print 'Training started...' for k in xrange(args.num_epochs): #shuffle the data points before going through them index_shuf = range(len(questions_train)) shuffle(index_shuf) questions_train = [questions_train[i] for i in index_shuf] answers_train = [answers_train[i] for i in index_shuf] images_train = [images_train[i] for i in index_shuf] progbar = generic_utils.Progbar(len(questions_train)) for qu_batch, an_batch, im_batch in zip( grouper(questions_train, args.batch_size, fillvalue=questions_train[-1]), grouper(answers_train, args.batch_size, fillvalue=answers_train[-1]), grouper(images_train, args.batch_size, fillvalue=images_train[-1])): X_q_batch = get_questions_matrix_sum(qu_batch, nlp) im_path = args.im_dir + "abstract_v002_train2015_" #print 'getting image features...' #X_i_batch = get_images_matrix_from_model(vgg_model, im_batch, im_path) #X_batch = np.hstack((X_q_batch, X_i_batch)) Y_batch = get_answers_matrix(an_batch, labelencoder) print 'running training on batch...' loss = model.train_on_batch(X_batch, Y_batch) progbar.add(args.batch_size, values=[("train loss", loss)]) if k % args.model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k))
def train(**kwargs): """ Train standard DCGAN model args: **kwargs (dict) keyword arguments that specify the model hyperparameters """ # Roll out the parameters generator = kwargs["generator"] discriminator = kwargs["discriminator"] dset = kwargs["dset"] img_dim = kwargs["img_dim"] nb_epoch = kwargs["nb_epoch"] batch_size = kwargs["batch_size"] n_batch_per_epoch = kwargs["n_batch_per_epoch"] bn_mode = kwargs["bn_mode"] noise_dim = kwargs["noise_dim"] noise_scale = kwargs["noise_scale"] lr_D = kwargs["lr_D"] lr_G = kwargs["lr_G"] opt_D = kwargs["opt_D"] opt_G = kwargs["opt_G"] use_mbd = kwargs["use_mbd"] image_dim_ordering = kwargs["image_dim_ordering"] epoch_size = n_batch_per_epoch * batch_size deterministic = kwargs["deterministic"] inject_noise = kwargs["inject_noise"] model = kwargs["model"] no_supertrain = kwargs["no_supertrain"] pureGAN = kwargs["pureGAN"] lsmooth = kwargs["lsmooth"] simple_disc = kwargs["simple_disc"] resume = kwargs["resume"] name = kwargs["name"] wd = kwargs["wd"] history_size = kwargs["history_size"] monsterClass = kwargs["monsterClass"] print("\nExperiment parameters:") for key in kwargs.keys(): print key, kwargs[key] print("\n") # Setup environment (logging directory etc) general_utils.setup_logging("DCGAN") # Load and normalize data if dset == "mnistM": X_source_train, Y_source_train, X_source_test, Y_source_test, n_classes1 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='mnist') X_dest_train, Y_dest_train, X_dest_test, Y_dest_test, n_classes2 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='mnistM') #code.interact(local=locals()) elif dset == "washington_vandal50k": X_source_train = data_utils.load_image_dataset(img_dim, image_dim_ordering, dset='washington') X_dest_train = data_utils.load_image_dataset(img_dim, image_dim_ordering, dset='vandal50k') elif dset == "washington_vandal12classes": X_source_train = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='washington12classes') X_dest_train = data_utils.load_image_dataset(img_dim, image_dim_ordering, dset='vandal12classes') elif dset == "washington_vandal12classesNoBackground": X_source_train, Y_source_train, n_classes1 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='washington12classes') X_dest_train, Y_dest_train, n_classes2 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='vandal12classesNoBackground') elif dset == "Wash_Vand_12class_LMDB": X_source_train, Y_source_train, n_classes1 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='Wash_12class_LMDB') elif dset == "OfficeDslrToAmazon": X_source_train, Y_source_train, X_source_test, Y_source_test, n_classes1 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='OfficeDslr') X_dest_train, Y_dest_train, X_dest_test, Y_dest_test, n_classes2 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='OfficeAmazon') elif dset == "bedrooms128": X_source_train, Y_source_train, X_source_test, Y_source_test, n_classes1 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='bedrooms128small') X_dest_train, Y_dest_train, X_dest_test, Y_dest_test, n_classes2 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='bedrooms128') elif dset == "bedrooms": X_source_train, Y_source_train, X_source_test, Y_source_test, n_classes1 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='bedrooms_small') X_dest_train, Y_dest_train, X_dest_test, Y_dest_test, n_classes2 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='bedrooms') elif dset == "Vand_Vand_12class_LMDB": X_source_train, Y_source_train, X_source_test, Y_source_test, n_classes1 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='Vand_12class_LMDB_Background') X_dest_train, Y_dest_train, X_dest_test, Y_dest_test, n_classes2 = data_utils.load_image_dataset( img_dim, image_dim_ordering, dset='Vand_12class_LMDB') else: print "dataset not supported" if n_classes1 != n_classes2: #sanity check print "number of classes mismatch between source and dest domains" n_classes = n_classes1 # img_source_dim = X_source_train.shape[-3:] # is it backend agnostic? img_dest_dim = X_dest_train.shape[-3:] # Create optimizers opt_D = data_utils.get_optimizer(opt_D, lr_D) opt_G = data_utils.get_optimizer(opt_G, lr_G) opt_C = data_utils.get_optimizer('SGD', 0.01) opt_Z = data_utils.get_optimizer('Adam', lr_G) ####################### # Load models ####################### noise_dim = (noise_dim, ) generator_model = models.generator_deconv(noise_dim, img_source_dim, img_dest_dim, bn_mode, deterministic, pureGAN, inject_noise, wd) discriminator_model = models.discriminator_naive1( img_dest_dim, wd, inject_noise, model_name="discriminator_naive1") disc_penalty_model = models.disc_penalty(discriminator_model, noise_dim, img_source_dim, opt_D, model_name="disc_penalty_model") DCGAN_model = models.DCGAN_naive(generator_model, discriminator_model, noise_dim, img_source_dim) #zclass_model = z_coerence(generator_model,img_source_dim, bn_mode,wd,inject_noise,n_classes,noise_dim, model_name="zClass") ############################ # Compile models ############################ generator_model.compile(loss='mse', optimizer=opt_G) models.make_trainable(discriminator_model, False) models.make_trainable(disc_penalty_model, False) if model == 'wgan': DCGAN_model.compile(loss=models.wasserstein, optimizer=opt_G) models.make_trainable(discriminator_model, True) models.make_trainable(disc_penalty_model, True) discriminator_model.compile(loss=models.wasserstein, optimizer=opt_D) if model == 'lsgan': if simple_disc: DCGAN_model.compile(loss=['mse'], optimizer=opt_G) models.make_trainable(discriminator_model, True) models.make_trainable(disc_penalty_model, True) discriminator_model.compile(loss=['mse'], optimizer=opt_D) #zclass_model.compile(loss=['mse'],optimizer = opt_Z) elif monsterClass: DCGAN_model.compile(loss=['categorical_crossentropy'], optimizer=opt_G) models.make_trainable(disc_penalty_model, True) models.make_trainable(discriminator_model, True) discriminator_model.compile(loss=['categorical_crossentropy'], optimizer=opt_D) else: DCGAN_model.compile(loss=['mse', 'categorical_crossentropy'], loss_weights=[1.0, 1.0], optimizer=opt_G) models.make_trainable(disc_penalty_model, True) models.make_trainable(discriminator_model, True) discriminator_model.compile( loss=['mse', 'categorical_crossentropy'], loss_weights=[1.0, 1.0], optimizer=opt_D) visualize = True if resume: ########loading previous saved model weights data_utils.load_model_weights(generator_model, discriminator_model, DCGAN_model, name) ############### ####WEIGHTNORM DATA INIT: ############### ##################### ###train zclass ############# X_gen = data_utils.sample_noise(noise_scale, X_source_train.shape[0], noise_dim) #data_based_init(generator_model, [X_gen,X_source_train]) # zclass_loss = zclass_model.fit([X_gen,X_source_train],[X_gen],batch_size=256,nb_epoch=10) ##################### ###classifier ##################### if not ((dset == 'mnistM') or (dset == 'bedrooms') or (dset == 'bedrooms128')): classifier, GenToClassifierModel = classifier_build_test( img_dest_dim, n_classes, generator_model, noise_dim, noise_scale, img_source_dim, opt_C, X_source_test, Y_source_test, X_dest_test, Y_dest_test, wd=0.0001) gen_iterations = 0 max_history_size = int(history_size * batch_size) img_buffer = ImageHistoryBuffer((0, ) + img_source_dim, max_history_size, batch_size, n_classes) ################# # Start training ################ for e in range(nb_epoch): # Initialize progbar and batch counter progbar = generic_utils.Progbar(epoch_size) batch_counter = 1 start = time.time() while batch_counter < n_batch_per_epoch: if no_supertrain is None: if (gen_iterations < 25) and (not resume): disc_iterations = 50 if gen_iterations % 500 == 0: disc_iterations = 50 else: disc_iterations = kwargs["disc_iterations"] else: if (gen_iterations < 25) and (not resume): disc_iterations = 50 else: disc_iterations = kwargs["disc_iterations"] ################################### # 1) Train the critic / discriminator ################################### list_disc_loss_real = deque(4 * [0], 4) list_disc_loss_gen = deque(4 * [0], 4) list_gen_loss = deque(4 * [0], 4) list_zclass_loss = [] list_gp_loss = deque(4 * [0], 4) for disc_it in range(disc_iterations): X_dest_batch, Y_dest_batch, idx_dest_batch = next( data_utils.gen_batch(X_dest_train, Y_dest_train, batch_size)) X_source_batch, Y_source_batch, idx_source_batch = next( data_utils.gen_batch(X_source_train, Y_source_train, batch_size)) # Create a batch to feed the discriminator model X_disc_real, X_disc_gen = data_utils.get_disc_batch( X_dest_batch, generator_model, batch_counter, batch_size, noise_dim, X_source_batch, noise_scale=noise_scale) if model == 'wgan': # Update the discriminator current_labels_real = -np.ones(X_disc_real.shape[0]) current_labels_gen = np.ones(X_disc_gen.shape[0]) if model == 'lsgan': if simple_disc: #for real domain I put [labels 0 0 0...0], for fake domain I put [0 0...0 labels] current_labels_real = np.ones(X_disc_real.shape[0]) #current_labels_gen = -np.ones(X_disc_gen.shape[0]) current_labels_gen = np.zeros(X_disc_gen.shape[0]) elif monsterClass: #for real domain I put [labels 0 0 0...0], for fake domain I put [0 0...0 labels] current_labels_real = np.concatenate( (Y_dest_batch, np.zeros((X_disc_real.shape[0], n_classes))), axis=1) current_labels_gen = np.concatenate((np.zeros( (X_disc_real.shape[0], n_classes)), Y_source_batch), axis=1) else: current_labels_real = [ np.ones(X_disc_real.shape[0]), Y_dest_batch ] Y_fake_batch = (1.0 / n_classes) * np.ones( [X_disc_gen.shape[0], n_classes]) current_labels_gen = [ np.zeros(X_disc_gen.shape[0]), Y_fake_batch ] #label smoothing #current_labels_real = np.multiply(current_labels_real, lsmooth) #usually lsmooth = 0.7 disc_loss_real = discriminator_model.train_on_batch( X_disc_real, current_labels_real) img_buffer.add_to_buffer(X_disc_gen, current_labels_gen, batch_size) bufferImages, bufferLabels = img_buffer.get_from_buffer( batch_size) disc_loss_gen = discriminator_model.train_on_batch( bufferImages, bufferLabels) list_disc_loss_real.appendleft(disc_loss_real) list_disc_loss_gen.appendleft(disc_loss_gen) ############# ####Train the discriminator w.r.t gradient penalty ############# gp_loss = disc_penalty_model.train_on_batch( [X_disc_real, X_disc_gen], current_labels_real ) #dummy labels,not used in the loss function list_gp_loss.appendleft(gp_loss) ####################### # 2) Train the generator ####################### X_gen = data_utils.sample_noise(noise_scale, batch_size, noise_dim) X_source_batch2, Y_source_batch2, idx_source_batch2 = next( data_utils.gen_batch(X_source_train, Y_source_train, batch_size)) #zclass_loss = zclass_model.train_on_batch([X_gen,X_source_batch2],[X_gen]) #list_zclass_loss.append(zclass_loss) # w1 = classifier.get_weights() #FOR DEBUG if model == 'wgan': gen_loss = DCGAN_model.train_on_batch([X_gen, X_source_batch2], -np.ones(X_gen.shape[0])) if model == 'lsgan': if simple_disc: gen_loss = DCGAN_model.train_on_batch( [X_gen, X_source_batch2], np.ones(X_gen.shape[0])) #TRYING SAME BATCH OF DISC? elif monsterClass: labels_gen = np.concatenate( (Y_source_batch2, np.zeros((X_disc_real.shape[0], n_classes))), axis=1) gen_loss = DCGAN_model.train_on_batch( [X_gen, X_source_batch2], labels_gen) else: gen_loss = DCGAN_model.train_on_batch( [X_gen, X_source_batch2], [np.ones(X_gen.shape[0]), Y_source_batch2]) # gen_loss2 = GenToClassifierModel.train_on_batch([X_gen,X_source_batch2], Y_source_batch2) # w2 = classifier.get_weights() #FOR DEBUG # for a,b in zip(w1, w2): # if np.all(a == b): # print "no bug in GEN model update" # else: # print "BUG IN GEN MODEL UPDATE" list_gen_loss.appendleft(gen_loss) gen_iterations += 1 batch_counter += 1 progbar.add(batch_size, values=[("Loss_D", 0.5 * np.mean(list_disc_loss_real) + 0.5 * np.mean(list_disc_loss_gen)), ("Loss_D_real", np.mean(list_disc_loss_real)), ("Loss_D_gen", np.mean(list_disc_loss_gen)), ("Loss_G", np.mean(list_gen_loss)), ("Loss_gp", np.mean(list_gp_loss))]) # plot images 1 times per epoch if batch_counter % (n_batch_per_epoch) == 0: X_source_batch_plot, Y_source_batch_plot, idx_source_plot = next( data_utils.gen_batch(X_source_test, Y_source_test, batch_size=32)) data_utils.plot_generated_batch(X_dest_test, X_source_test, generator_model, noise_dim, image_dim_ordering, idx_source_plot, batch_size=32) if gen_iterations % (n_batch_per_epoch * 5) == 0: if visualize: BIG_ASS_VISUALIZATION_slerp(X_source_train[1], generator_model, noise_dim) print("Dest labels:") print(Y_dest_test[idx_source_plot].argmax(1)) print("Source labels:") print(Y_source_batch_plot.argmax(1)) print('\nEpoch %s/%s, Time: %s' % (e + 1, nb_epoch, time.time() - start)) # Save model weights (by default, every 5 epochs) data_utils.save_model_weights(generator_model, discriminator_model, DCGAN_model, e, name)
# gen_model.load_weights("models/Gen/model_at_epoch_0.h5") # disc_model.load_weights("models/Disc/model_at_epoch_0.h5") mr_data = get_data(mr_path) ct_data = get_data(ct_path) Y_true_batch = np.ones((batch_size, 1), dtype="float32") Y_fake_batch = np.zeros((batch_size, 1), dtype="float32") y_gen = np.ones((batch_size, 1), dtype="float32") zero_flow = np.zeros( (batch_size, data_shape[0], data_shape[1], data_shape[2], 3), dtype="float32") for ep in range(epochs): print("epochs:" + str(ep)) progbar = keras_generic_utils.Progbar(train_num) for mini_batch in range(0, train_num, batch_size): # -----------------------------------train discriminator------------------------------------------- disc_model.trainable = True idx_mr = np.random.choice(mr_data.shape[0], batch_size, replace=False) mr_batch = mr_data[idx_mr] ct_batch = ct_data[idx_mr] src_t, flow, ct_gen = reg_gen_model.predict([mr_batch, ct_batch]) if random.randint(0, 1) == 0: X_disc_batch = np.concatenate((ct_batch, ct_gen), axis=0) Y_disc_batch = np.concatenate((Y_true_batch, Y_fake_batch), axis=0) else:
losses = np.zeros((epoch_length, 5)) rpn_accuracy_rpn_monitor = [] rpn_accuracy_for_epoch = [] start_time = time.time() best_loss = np.Inf class_mapping_inv = {v: k for k, v in class_mapping.items()} print('Starting training') # vis = True for epoch_num in range(num_epochs): progbar = generic_utils.Progbar(epoch_length) # keras progress bar 사용 print('Epoch {}/{}'.format(epoch_num + 1, num_epochs)) while True: # try: # mean overlapping bboxes 출력 if len(rpn_accuracy_rpn_monitor) == epoch_length and C.verbose: mean_overlapping_bboxes = float( sum(rpn_accuracy_rpn_monitor)) / len(rpn_accuracy_rpn_monitor) rpn_accuracy_rpn_monitor = [] print( 'Average number of overlapping bounding boxes from RPN = {} for {} previous iterations' .format(mean_overlapping_bboxes, epoch_length)) if mean_overlapping_bboxes == 0: print( 'RPN is not producing bounding boxes that overlap the ground truth boxes. Check RPN settings or keep training.'
def main(): parser = argparse.ArgumentParser() parser.add_argument('-num_hidden_units', type=int, default=512) parser.add_argument('-num_lstm_layers', type=int, default=2) parser.add_argument('-dropout', type=float, default=0.2) parser.add_argument('-activation', type=str, default='tanh') args = parser.parse_args() questions_train = open('../data/preprocessed/questions_train2014.txt', 'r').read().decode('utf8').splitlines() questions_lengths_train = open( '../data/preprocessed/questions_lengths_train2014.txt', 'r').read().decode('utf8').splitlines() answers_train = open('../data/preprocessed/answers_train2014_modal.txt', 'r').read().decode('utf8').splitlines() images_train = open('../data/preprocessed/images_train2014.txt', 'r').read().decode('utf8').splitlines() max_answers = 1000 questions_train, answers_train, images_train = selectFrequentAnswers( questions_train, answers_train, images_train, max_answers) print 'Loaded questions, sorting by length...' questions_lengths_train, questions_train, answers_train = ( list(t) for t in zip(*sorted( zip(questions_lengths_train, questions_train, answers_train)))) #encode the remaining answers labelencoder = preprocessing.LabelEncoder() labelencoder.fit(answers_train) nb_classes = len(list(labelencoder.classes_)) joblib.dump(labelencoder, '../models/labelencoder.pkl') max_len = 30 #25 is max for training, 27 is max for validation word_vec_dim = 300 model = Sequential() model.add( LSTM(output_dim=args.num_hidden_units, activation='tanh', return_sequences=True, input_shape=(max_len, word_vec_dim))) model.add(Dropout(args.dropout)) model.add(LSTM(args.num_hidden_units, return_sequences=False)) model.add(Dense(nb_classes, init='uniform')) model.add(Activation('softmax')) json_string = model.to_json() model_file_name = '../models/lstm_language_only_num_hidden_units_' + str( args.num_hidden_units) + '_num_lstm_layers_' + str( args.num_lstm_layers) + '_dropout_' + str(args.dropout) open(model_file_name + '.json', 'w').write(json_string) print 'Compiling model...' model.compile(loss='categorical_crossentropy', optimizer='rmsprop') print 'Compilation done...' #set up word vectors nlp = English() print 'loaded word2vec features...' ## training print 'Training started...' numEpochs = 100 model_save_interval = 5 batchSize = 128 for k in xrange(numEpochs): progbar = generic_utils.Progbar(len(questions_train)) for qu_batch, an_batch, im_batch in zip( grouper(questions_train, batchSize, fillvalue=questions_train[0]), grouper(answers_train, batchSize, fillvalue=answers_train[0]), grouper(images_train, batchSize, fillvalue=images_train[0])): timesteps = len(nlp( qu_batch[-1])) #questions sorted in descending order of length X_q_batch = get_questions_tensor_timeseries( qu_batch, nlp, timesteps) Y_batch = get_answers_matrix(an_batch, labelencoder) loss = model.train_on_batch(X_q_batch, Y_batch) progbar.add(batchSize, values=[("train loss", loss)]) if k % model_save_interval == 0: model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k)) model.save_weights(model_file_name + '_epoch_{:02d}.hdf5'.format(k + 1))
img_dim=(256, 256, 3)) # DCGAN_model.load_weights('./models/pix2pix/DCGAN_weights_epoch_6.h5') loss = [l1_loss, 'binary_crossentropy'] # loss = [perceptual_loss, 'binary_crossentropy'] loss_weights = [1E1, 1] DCGAN_model.compile(loss=loss, loss_weights=loss_weights, optimizer=G_opt) discriminator_model.trainable = True discriminator_model.compile(loss='binary_crossentropy', optimizer=D_opt) # Start training print("Start training") for e in range(1, nb_epoch + 1): # Initialize progbar and batch counter progbar = generic_utils.Progbar(epoch_size) print('Epoch %s/%s' % (e, nb_epoch)) for b in range(1, n_batch_per_epoch + 1): X_HR_batch, X_LR_batch = gen_batch(train_list, batch_size) # Create a batch to feed the discriminator model X_disc, y_disc = get_disc_batch(X_HR_batch, X_LR_batch, generator_model, b) # Update the discriminator disc_loss = discriminator_model.train_on_batch(X_disc, y_disc) # Create a batch to feed the generator model X_gen_target, X_gen = gen_batch(train_list, batch_size) y_gen = np.zeros((X_gen.shape[0], 1), dtype=np.uint8) y_gen[:, 0] = 1
def main(): start_time = time.time() signal.signal(signal.SIGINT, InterruptHandler) #signal.signal(signal.SIGKILL, InterruptHandler) signal.signal(signal.SIGTERM, InterruptHandler) parser = argparse.ArgumentParser( prog='trainMLP.py', description='Train MLP model for visual question answering') parser.add_argument('--mlp-hidden-units', type=int, default=1024, metavar='<mlp-hidden-units>') parser.add_argument('--mlp-hidden-layers', type=int, default=3, metavar='<mlp-hidden-layers>') parser.add_argument('--dropout', type=float, default=0.5, metavar='<dropout-rate>') parser.add_argument('--mlp-activation', type=str, default='relu', metavar='<activation-function>') parser.add_argument('--num-epochs', type=int, default=100, metavar='<num-epochs>') parser.add_argument('--model-save-interval', type=int, default=5, metavar='<interval>') parser.add_argument('--batch-size', type=int, default=128, metavar='<batch-size>') args = parser.parse_args() word_vec_dim = 300 img_dim = 4096 ###################### # Load Data # ###################### print('Loading data...') train_id_pairs, train_image_ids = LoadIds('train') dev_id_pairs, dev_image_ids = LoadIds('dev') train_choices = LoadChoices('train') dev_choices = LoadChoices('dev') train_answers = LoadAnswers('train') dev_answers = LoadAnswers('dev') print('Finished loading data.') print('Time: %f s' % (time.time() - start_time)) print('-' * 100, file=sys.stderr) print('Training Information', file=sys.stderr) print('# of MLP hidden units: %i' % args.mlp_hidden_units, file=sys.stderr) print('# of MLP hidden layers: %i' % args.mlp_hidden_layers, file=sys.stderr) print('Dropout: %f' % args.dropout, file=sys.stderr) print('MLP activation function: %s' % args.mlp_activation, file=sys.stderr) print('# of training epochs: %i' % args.num_epochs, file=sys.stderr) print('Batch size: %i' % args.batch_size, file=sys.stderr) print('# of train images: %i' % len(train_image_ids), file=sys.stderr) print('# of dev images: %i' % len(dev_image_ids), file=sys.stderr) print('-' * 100, file=sys.stderr) ###################### # Model Descriptions # ###################### # MLP model model = Sequential() model.add(Dense(output_dim=args.mlp_hidden_units, input_dim=img_dim)) model.add(Activation(args.mlp_activation)) model.add(Dropout(args.dropout)) for i in range(args.mlp_hidden_layers - 1): model.add(Dense(args.mlp_hidden_units)) model.add(Activation(args.mlp_activation)) model.add(Dropout(args.dropout)) model.add(Dense(word_vec_dim)) model.add(Activation('softmax')) json_string = model.to_json() model_filename = 'models/mlp_units_%i_layers_%i' % (args.mlp_hidden_units, args.mlp_hidden_layers) open(model_filename + '.json', 'w').write(json_string) # loss and optimizer model.compile(loss='categorical_crossentropy', optimizer='adagrad') print('Compilation finished.') print('Time: %f s' % (time.time() - start_time)) ######################################## # Load CNN Features and Word Vectors # ######################################## # load VGG features print('Loading VGG features...') VGG_features, img_map = LoadVGGFeatures() print('VGG features loaded') print('Time: %f s' % (time.time() - start_time)) # load GloVe vectors print('Loading GloVe vectors...') word_embedding, word_map = LoadGloVe() print('GloVe vectors loaded') print('Time: %f s' % (time.time() - start_time)) ###################### # Make Batches # ###################### print('Making batches...') # training batches train_answer_batches = [ b for b in MakeBatches(train_answers['toks'], args.batch_size, fillvalue=train_answers['toks'][-1]) ] train_image_batches = [ b for b in MakeBatches( train_image_ids, args.batch_size, fillvalue=train_image_ids[-1]) ] train_indices = list(range(len(train_image_ids))) # validation batches dev_answer_batches = [ b for b in MakeBatches(dev_answers['labs'], args.batch_size, fillvalue=dev_answers['labs'][-1]) ] dev_choice_batches = [ b for b in MakeBatches( dev_choices, args.batch_size, fillvalue=dev_choices[-1]) ] dev_image_batches = [ b for b in MakeBatches( dev_image_ids, args.batch_size, fillvalue=dev_image_ids[-1]) ] print('Finished making batches.') print('Time: %f s' % (time.time() - start_time)) ###################### # Training # ###################### dev_accs = [] max_acc = -1 max_acc_epoch = -1 print('Training started...') for k in range(args.num_epochs): print('Epoch %i' % (k + 1), file=sys.stderr) print('-' * 80) print('Epoch %i' % (k + 1)) progbar = generic_utils.Progbar(len(train_indices) * args.batch_size) # shuffle batch indices random.shuffle(train_indices) for i in train_indices: X_image_batch = GetImagesMatrix(train_image_batches[i], img_map, VGG_features) Y_answer_batch = GetAnswersMatrix(train_answer_batches[i], word_embedding, word_map) loss = model.train_on_batch(X_image_batch, Y_answer_batch) loss = loss[0].tolist() progbar.add(args.batch_size, values=[('train loss', loss)]) if k % args.model_save_interval == 0: model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k + 1), overwrite=True) # evaluate on dev set widgets = [ 'Evaluating ', Percentage(), ' ', Bar(marker='#', left='[', right=']'), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, redirect_stdout=True) dev_correct = 0 for i in pbar(range(len(dev_image_batches))): # feed forward X_image_batch = GetImagesMatrix(dev_image_batches[i], img_map, VGG_features) prob = model.predict_proba(X_image_batch, args.batch_size, verbose=0) # get word vecs of choices choice_feats = GetChoicesTensor(dev_choice_batches[i], word_embedding, word_map) similarity = np.zeros((5, args.batch_size), float) # calculate cosine distances for j in range(5): similarity[j] = np.diag( cosine_similarity(prob, choice_feats[j])) # take argmax of cosine distances pred = np.argmax(similarity, axis=0) + 1 dev_correct += np.count_nonzero(dev_answer_batches[i] == pred) dev_acc = float(dev_correct) / len(dev_image_ids) dev_accs.append(dev_acc) print('Validation Accuracy: %f' % dev_acc) print('Validation Accuracy: %f' % dev_acc, file=sys.stderr) print('Time: %f s' % (time.time() - start_time)) print('Time: %f s' % (time.time() - start_time), file=sys.stderr) if dev_acc > max_acc: max_acc = dev_acc max_acc_epoch = k model.save_weights(model_filename + '_best.hdf5', overwrite=True) model.save_weights(model_filename + '_epoch_{:03d}.hdf5'.format(k + 1)) print(dev_accs, file=sys.stderr) print('Best validation accuracy: epoch#%i' % max_acc_epoch) print('Training finished.') print('Training finished.', file=sys.stderr) print('Time: %f s' % (time.time() - start_time)) print('Time: %f s' % (time.time() - start_time), file=sys.stderr)