def c3d(nb_classes, img_w=m_c3d.kernel_w, img_h=m_c3d.kernel_h, include_top=True): # generator_train_batch = m_c3d.generator_train_batch # generator_val_batch = m_c3d.generator_val_batch # generator_test_batch = m_c3d.generator_test_batch generator_train_batch = g_c3d.generator_train_batch generator_val_batch = g_c3d.generator_val_batch generator_test_batch = g_c3d.generator_test_batch # generator_train_batch = m_c3d.generator_train_batch_opt_flow # generator_val_batch = m_c3d.generator_val_batch_opt_flow # generator_test_batch = m_c3d.generator_test_batch_opt_flow m_c3d.doFlip = doFlip m_c3d.doScale = doScale g_c3d.doFlip = doFlip g_c3d.doScale = doScale g_c3d.doNoisy = True g_c3d.doTrans = True return m_c3d.c3d_model( nb_classes=nb_classes, img_w=img_w, img_h=img_h, num_channels=3, include_top=include_top ), generator_train_batch, generator_val_batch, generator_test_batch
def main(): train_file = 'train_list.txt' test_file = 'test_list.txt' f1 = open(train_file, 'r') f2 = open(test_file, 'r') lines = f1.readlines() f1.close() train_samples = len(lines) lines = f2.readlines() f2.close() val_samples = len(lines) num_classes = 101 epochs = 25 input_shape = (112, 112, 8, 3) if args.model == 'c3d': model = c3d.c3d_model(num_classes, input_shape) elif args.model == 'resnet_3d': model = resnet_3d.resnet_3d(num_classes, input_shape, drop_rate=args.drop_rate) elif args.model == 'densenet_3d': model = densenet_3d.densenet_3d(num_classes, input_shape, dropout_rate=args.drop_rate) elif args.model == 'inception_3d': model = inception_3d.inception_3d(num_classes, input_shape) elif args.model == 'dense_resnet_3d': model = DenseResNet_3d.dense_resnet_3d(num_classes, input_shape, dropout_rate=args.drop_rate) else: raise ValueError('Do not support {}!'.format(args.model)) sgd = SGD(lr=args.lr, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.summary() train = generator_train_batch(train_file, args.batch_size, num_classes, args.img_path) val = generator_val_batch(test_file, args.batch_size, num_classes, args.img_path) print('Start training..') history = model.fit_generator( train, steps_per_epoch=train_samples // args.batch_size, epochs=epochs, callbacks=[onetenth_10_15_20(args.lr)], validation_data=val, validation_steps=val_samples // args.batch_size, verbose=1) if not os.path.exists('results/'): os.mkdir('results/') plot_history(history, 'results/', args.model) save_history(history, 'results/', args.model) model.save_weights('results/weights_{}.h5'.format(args.model))
def video_audio(input_audio=(257, 129, 1), num_class=2, mode='train', args=None): from models.c3d import c3d_model nb_classes = num_class img_w = 112 img_h = 112 mx = c3d_model(nb_classes=nb_classes, img_w=img_w, img_h=img_h, num_channels=3, include_top=False) my = vggvox_resnet2d(args=args, input_dim=input_audio, mode=mode, include_top=False) z = keras.layers.concatenate([mx.output, my.output]) z = Dense(2048, activation='relu', kernel_regularizer=l2(weight_decay))(z) z = Dropout(0.1)(z) z = Dense(2048, activation='relu', kernel_regularizer=l2(weight_decay))(z) # z = Dropout(0.5)(z) z = Dense(nb_classes, kernel_regularizer=l2(weight_decay))(z) z = Activation('softmax')(z) model = Model(inputs=[mx.input, my.input], outputs=z) return model
def load_data(class_names, video, model_path): with open(class_names, 'r') as f: class_names = f.readlines() f.close() # init model model = c3d_model(num_classes, kernel_w, kernel_h) # model = m.r3d_34(num_classes, kernel_w, kernel_h) lr = 0.005 sgd = SGD(lr=lr, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.summary() # model.load_weights('./results/weights_c3d.h5', by_name=True) model.load_weights(model_path + '/weights_c3d.h5', by_name=True) # read GroudTruth gt_speakers, gt_faces = load_ground_truth(video) # read video cap = cv2.VideoCapture(video) return cap, class_names, model, gt_speakers, gt_faces
def ict3DHP_test(video, class_names): with open(class_names, 'r') as f: class_names = f.readlines() f.close() # init model model = c3d_model(num_classes, kernel_w, kernel_h) # model = m.r3d_34(num_classes, kernel_w, kernel_h) lr = 0.005 sgd = SGD(lr=lr, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.summary() model.load_weights('./results/weights_c3d.h5', by_name=True) # read video # video = './videos/v_Biking_g05_c02.avi' # video = '/data/jfmadrig/mvlrs_v1/pretrain/5672968469174139300/00004.mp4' cap = cv2.VideoCapture(video) gt_index = 1 h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) w = cap.get(cv2.CAP_PROP_FRAME_WIDTH) with open('result_video_demo.txt', 'w') as fp: fp.write('Resolution\tFP\tMean FP\tTP\tMean TP\n') for i in range(1, 10): tmp_w = int(w / i) tmp_h = int(h / i) data = process_video_ict3DHP(cap, class_names, model, gt_index, (tmp_w, tmp_h)) cap.set(cv2.CAP_PROP_POS_FRAMES, 0) fp.write('{}\t{}\t{}\t{}\t{}\n'.format((tmp_w, tmp_h), data[1], data[2], data[3], data[4])) fp.close() cap.release() cv2.destroyAllWindows()