def main(): img_path = '/home/shixi/C3D-keras/datasets/ucfimgs/' train_file = 'train_list.txt' test_file = 'test_list.txt' f1 = open(train_file, 'r') f2 = open(test_file, 'r') lines = f1.readlines() f1.close() train_samples = len(lines) lines = f2.readlines() f2.close() val_samples = len(lines) num_classes = 2 batch_size = 16 epochs = 16 model = c3d_model() lr = 0.005 sgd = SGD(lr=lr, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.summary() history = model.fit_generator(generator_train_batch(train_file, batch_size, num_classes,img_path), steps_per_epoch=train_samples // batch_size, epochs=epochs, callbacks=[onetenth_4_8_12(lr)], validation_data=generator_val_batch(test_file, batch_size,num_classes,img_path), validation_steps=val_samples // batch_size, verbose=1) if not os.path.exists('results/'): os.mkdir('results/') plot_history(history, 'results/') save_history(history, 'results/') model.save_weights('results/weights_c3d.h5')
def c3d_svm(): files = open("list/test.list") videonum = len(list(files)) print("The number of test video={}".format(videonum)) x, y = placeholder_input() sess = tf.InteractiveSession() # get the output of the network network = c3d_model(x, n_classes) sess.run(tf.global_variables_initializer()) iteration = int(videonum / batch_size) next_start_pos = 0 X_ = [] Y_ = [] for i in range(iteration): test_images, test_labels, next_start_pos, _, valid_len = \ input_data.read_clip_and_label( "list/test.list", batch_size, start_pos=next_start_pos ) feed_dict = {x: test_images, y: test_labels} y_ = sess.run(y_, feed_dict=feed_dict) X_.append(y_) Y_.append(test_labels) clf = joblib.load(svm_model) clf.score(X_, Y_)
def train_svm(): ''' when we train the svm,the softmax layer should be removed in the C3D model ''' sess = tf.InteractiveSession() # get the c3d Temporal-spatial features x, y = placeholder_input(batch_size) networks = c3d_model(x, n_classes) y_ = networks.outputs #init the parameters sess.run(tf.global_variables_initializer()) # load the pre-trained c3d model saver = tf.train.Saver() saver.restore(sess,models) X = [] Y = [] for i in range(iteration): data,labels = read_data(train_path,False) fc_out = sess.run(y_,feed_dict={x:data,y:labels}) X.append(fc_out) Y.append(labels) # train the linearSVM clf = linear_model.SGDClassifier() #split the dataset # cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0) # plot_learning_curve(clf,"Learning Curves(SVM)",X,Y,ylim=[0.7,1.01],cv=cv,train_size=[0.2,0.4,0.6,0.8]) joblib.dump(clf,"svm/model.pkl")
def simple_test(video, class_names): with open(class_names, 'r') as f: class_names = f.readlines() f.close() # init model model = c3d_model(num_classes) lr = 0.005 sgd = SGD(lr=lr, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.summary() model.load_weights('./results/weights_c3d.h5', by_name=True) # read video # video = './videos/v_Biking_g05_c02.avi' # video = '/data/jfmadrig/mvlrs_v1/pretrain/5672968469174139300/00004.mp4' cap = cv2.VideoCapture(video) clip = [] while True: ret, frame = cap.read() if ret: tmp = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) clip.append(cv2.resize(tmp, (171, 128))) if len(clip) == 16: inputs = np.array(clip).astype(np.float32) inputs = np.expand_dims(inputs, axis=0) inputs[..., 0] -= 99.9 inputs[..., 1] -= 92.1 inputs[..., 2] -= 82.6 inputs[..., 0] /= 65.8 inputs[..., 1] /= 62.3 inputs[..., 2] /= 60.3 inputs = inputs[:,:,8:120,30:142,:] inputs = np.transpose(inputs, (0, 2, 3, 1, 4)) pred = model.predict(inputs) label = np.argmax(pred[0]) cv2.putText(frame, class_names[label][:-1], (20, 20), # cv2.putText(frame, class_names[label].split(' ')[-1].strip(), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1) cv2.putText(frame, "prob: %.4f" % pred[0][label], (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1) clip.pop(0) cv2.imshow('result', frame) cv2.waitKey(10) else: break cap.release() cv2.destroyAllWindows()
def main(): with open('./TrainTestFileList/classes.txt', 'r') as f: class_names = f.readlines() f.close() # init model model = c3d_model() lr = 0.005 sgd = SGD(lr=lr, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.summary() model.load_weights('C3D01--3.766.hdf5', by_name=True) # read video video = './videos/Best_Of_Skype_Laughter_Chain_laugh_h_nm_np1_fr_goo_13.avi' cap = cv2.VideoCapture(video) clip = [] while True: ret, frame = cap.read() if ret: tmp = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) clip.append(cv2.resize(tmp, (120, 90))) if len(clip) == 16: inputs = np.array(clip).astype(np.float32) inputs = np.expand_dims(inputs, axis=0) inputs /= 255. inputs -= 0.5 inputs *= 2. inputs = np.transpose(inputs, (0, 2, 3, 1, 4)) pred = model.predict(inputs) label = np.argmax(pred[0]) cv2.putText(frame, class_names[label].split(' ')[-1].strip(), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1) cv2.putText(frame, "prob: %.4f" % pred[0][label], (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1) clip.pop(0) cv2.imshow('result', frame) cv2.waitKey(10) else: break cap.release() cv2.destroyAllWindows()
def main(): img_path = '/home/pirl/PycharmProjects/cnnTest/FrameImg/' train_file = 'newTrainlist.txt' test_file = 'newTestlist.txt' f1 = open(train_file, 'r') f2 = open(test_file, 'r') lines = f1.readlines() f1.close() train_samples = len(lines) lines = f2.readlines() f2.close() val_samples = len(lines) # Confusing name : why val? # hyper parameter num_classes = 2 batch_size = 16 epochs = 14 lr = 0.01 model = c3d_model(True) # train mode: True, test mode: False model.summary() # gpu set modelFromGpu = multi_gpu_model(model, gpus=2) # optimizer Adam optimizer = Adam(lr=lr, beta_1=0.9, beta_2=0.999) modelFromGpu.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy']) history = modelFromGpu.fit_generator(generator_train_batch(train_file, batch_size, num_classes,img_path), steps_per_epoch=train_samples // batch_size, epochs=epochs, callbacks=[onetenth_4_8_12(lr)], validation_data=generator_val_batch(test_file, batch_size,num_classes,img_path), validation_steps=val_samples // batch_size, verbose=1) if not os.path.exists('results/'): os.mkdir('results/') plot_history(history, 'results/') save_history(history, 'results/') model.save_weights('results/weights_c3d_lr001.h5')
def c3d_softmax(): files = open("list/test.list") videonum = len(list(files)) print("The number of test video={}".format(videonum)) x, y = placeholder_input() sess = tf.InteractiveSession() # get the output of the network network = c3d_model(x, n_classes) y_ = network.outputs y_op = tf.argmax(tf.nn.softmax(y_), 1) correct_pred = tf.equal(tf.cast(y_op, tf.int32), y) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) # init the parameters sess.run(tf.global_variables_initializer()) # load the model saver = tf.train.Saver() saver.restore(sess, model_name) next_start_pos = 0 total_acc = 0 iteration = int(videonum / batch_size) for i in range(iteration): duration = 0 start_time = time.time() test_images, test_labels, next_start_pos, _, valid_len = \ input_data.read_clip_and_label( "list/test.list", batch_size, start_pos=next_start_pos ) feed_dict = {x: test_images, y: test_labels} acc = sess.run(accuracy, feed_dict=feed_dict) duration = time.time() - start_time print("iteration %d has been finished in %d secends".format( i, duration)) total_acc += acc print("Done") average_acc = total_acc / iteration print("The test average accuracy is %.6f".format(average_acc))
def main(): img_path = '/home/kk/TAIL_week_1/datasets/imgs/' train_file = 'train_list.txt' valid_file = 'valid_list.txt' f1 = open(train_file, 'r') f2 = open(valid_file, 'r') lines = f1.readlines() f1.close() train_samples = len(lines) lines = f2.readlines() f2.close() valid_samples = len(lines) num_classes = 45 batch_size = 4 epochs = 8 model = c3d_model() #model.load_weights('C3D01--3.766.hdf5') #lr = 0.005 #sgd = SGD(lr=lr, momentum=0.9, nesterov=True) op = RMSprop() model.compile(loss='categorical_crossentropy', optimizer=op, metrics=['accuracy']) model.summary() checkpoint = ModelCheckpoint(filepath='C3D{epoch:02d}--{val_loss:.3f}.hdf5', monitor='loss', verbose=1, mode='min', period=1) history = model.fit_generator(generator_train_batch(valid_file, batch_size, num_classes,img_path), steps_per_epoch=valid_samples // batch_size, epochs=epochs, callbacks=[checkpoint], #validation_data=generator_val_batch(valid_file, # batch_size,num_classes,img_path), #validation_steps=valid_samples // batch_size, verbose=1) if not os.path.exists('results/'): os.mkdir('results/') plot_history(history, 'results/') save_history(history, 'results/') model.save_weights('results/weights_c3d.h5')
def ict3DHP_test(video, class_names): with open(class_names, 'r') as f: class_names = f.readlines() f.close() # init model model = c3d_model(num_classes) lr = 0.005 sgd = SGD(lr=lr, momentum=0.9, nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.summary() model.load_weights('./results/weights_c3d.h5', by_name=True) # read video # video = './videos/v_Biking_g05_c02.avi' # video = '/data/jfmadrig/mvlrs_v1/pretrain/5672968469174139300/00004.mp4' cap = cv2.VideoCapture(video) gt_index = 1 h = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) w = cap.get(cv2.CAP_PROP_FRAME_WIDTH) with open('result_video_demo.txt', 'w') as fp: fp.write('Resolution\tFP\tMean FP\tTP\tMean TP\n') for i in range(1, 10): tmp_w = int(w / i) tmp_h = int(h / i) data = process_video_ict3DHP(cap, class_names, model, gt_index,(tmp_w, tmp_h) ) cap.set(cv2.CAP_PROP_POS_FRAMES, 0) fp.write('{}\t{}\t{}\t{}\t{}\n'.format( (tmp_w, tmp_h), data[1], data[2], data[3], data[4])) fp.close() cap.release() cv2.destroyAllWindows()
def main(video_stream): # read config.txt root_dir = os.path.abspath(os.path.dirname(__file__)) #获取当前文件所在的目录 configpath = os.path.join(root_dir, "config.txt") config = configparser.ConfigParser() config.read(configpath) classInd_path = config.get("C3D", "classInd_path") weights_path = config.get("C3D", "weights_path") lr = config.get("C3D", "lr") momentum = config.get("C3D", "momentum") image_read = config.get("image", "image_read") image_write = config.get("image", "image_write") video_image = config.get("choose", "video_image") with open(classInd_path, 'r') as f: class_names = f.readlines() f.close() # init model num = 1 camera_ids = video_stream.keys() cap_write = {} model = c3d_model() sgd = SGD(lr=float(lr), momentum=float(momentum), nesterov=True) model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy']) model.summary() model.load_weights(weights_path, by_name=True) def multi_detecion(clip, frame): inputs = np.array(clip).astype(np.float32) inputs = np.expand_dims(inputs, axis=0) inputs[..., 0] -= 99.9 inputs[..., 1] -= 92.1 inputs[..., 2] -= 82.6 inputs[..., 0] /= 65.8 inputs[..., 1] /= 62.3 inputs[..., 2] /= 60.3 inputs = inputs[:, :, 8:120, 30:142, :] inputs = np.transpose(inputs, (0, 2, 3, 1, 4)) pred = model.predict(inputs) label = np.argmax(pred[0]) cv2.putText(frame, class_names[label].split(' ')[-1].strip(), (20, 20), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1) cv2.putText(frame, "prob: %.4f" % pred[0][label], (20, 40), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 0, 255), 1) clip.pop(0) return (frame) for i in camera_ids: cap_write['cap_' + i] = cv2.VideoCapture(video_stream[i][1]) size_1 = (int(cap_write['cap_' + i].get(cv2.CAP_PROP_FRAME_WIDTH)), int(cap_write['cap_' + i].get(cv2.CAP_PROP_FRAME_HEIGHT))) fps_1 = cap_write['cap_' + i].get(cv2.CAP_PROP_FPS) cap_write["write_" + i] = cv2.VideoWriter( video_stream[i][2], cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps_1, size_1) if video_image == 'video': while True: if num % 2 == 0: camera = 'camera_1' else: camera = 'camera_2' ret_1, frame_1 = cap_write['cap_' + str(camera)].read() if ret_1: tmp = cv2.cvtColor(frame_1, cv2.COLOR_BGR2RGB) video_stream[camera][0].append(cv2.resize(tmp, (171, 128))) if len(video_stream[camera][0]) == 16: frame_1 = multi_detecion(video_stream[camera][0], frame_1) print("16") cap_write['write_' + str(camera)].write(frame_1) print(camera + "success") num = num + 1 elif video_image == 'image': fileList = os.listdir(image_read) fileList.reverse() clip = [] for fileName in fileList: frame = cv2.imread(image_read + '/' + fileName) clip.append(cv2.resize(frame, (171, 128))) if len(clip) == 16: frame = multi_detecion(clip, frame) cv2.imwrite(image_write + '/' + str(num) + ".jpg", frame) print("write success") num = num + 1 else: print("choose image or video")
# print("y_train",y_train) x_train.append(np.array(stack_of_16)) cpe += 1 # print("y_train",np_utils.to_categorical(y_train,2)) # print("x_train",np.array(x_train).shape) # print("y_train",np.array(y_train).shape) # print("Total Frames:_x_train ", len(x_train)) yield (np.array(x_train).transpose(0, 1, 2, 3, 4), np_utils.to_categorical(y_train, 4)) #----------train on multiple gpus---------------------------------------# model1 = c3d_model() model1.compile(loss='categorical_crossentropy', optimizer='rmsprop', accuracy=["accuracy"]) # Replicates `model` on 8 GPUs. # This assumes that your machine has 8 available GPUs. #model = multi_gpu_model(model1, gpus=2) #----------------------------Strat_Training-------------------------------------------# H = model1.fit_generator( generate_data(train_frame_list, class_list_train, batch_size), steps_per_epoch=floor(len(train_frame_list) / (batch_size * no_frame)), epochs=1, validation_data=generate_data(test_frame_list, class_list_test, batch_size),
def train_c3d(): sess = tf.InteractiveSession() x , y = placeholder_input(batch_size) networks = c3d_model(x,n_classes) networks.print_params() # get the output y_ = networks.outputs y_op = tf.argmax(tf.nn.softmax(y_),1) # cross entropy cost cross_entropy = tl.cost.cross_entropy(y_,y,name="cross entropy") tf.summary.scalar("cross_entropy",cross_entropy) # accuracy correct_pred = tf.equal(tf.cast(y_op,tf.int32),y) accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) tf.summary.scalar("accuracy",accuracy) # l2 loss l2_cost = tf.contrib.layers.l2_regularizer(weight_decay)(networks.all_params[0]) + \ tf.contrib.layers.l2_regularizer(weight_decay)(networks.all_params[2]) + \ tf.contrib.layers.l2_regularizer(weight_decay)(networks.all_params[4]) + \ tf.contrib.layers.l2_regularizer(weight_decay)(networks.all_params[5]) + \ tf.contrib.layers.l2_regularizer(weight_decay)(networks.all_params[7]) + \ tf.contrib.layers.l2_regularizer(weight_decay)(networks.all_params[8]) + \ tf.contrib.layers.l2_regularizer(weight_decay)(networks.all_params[11])+ \ tf.contrib.layers.l2_regularizer(weight_decay)(networks.all_params[13])+ \ tf.contrib.layers.l2_regularizer(weight_decay)(networks.all_params[14]) tf.summary.scalar("l2 loss", l2_cost) # total loss total_loss = l2_cost + cross_entropy tf.summary.scalar("total loss",cross_entropy) # load the pre-trained model if os.path.exists(models): load_params = tl.files.load_npz(path='models/', name='model-1000.npz') tl.files.assign_params(sess, load_params, networks) # show the networks' information networks.print_layers() global_steps = tf.Variable(0,trainable=False) lr = tf.train.exponential_decay(learning_rate, global_steps, decay_steps, decay_rate, staircase=True) tf.summary.scalar("learning rate",lr) # get the network parameters' train_parms = networks.all_params # optimizer train_op = tf.train.GradientDescentOptimizer(lr).minimize(total_loss, var_list=train_parms, global_step=global_steps) # init the parmeters sess.run(tf.global_variables_initializer()) # mergerd the summary merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter("logs/train_log",sess.graph,max_queue=5) test_writer = tf.summary.FileWriter("logs/test_log",sess.graph,max_queue=5) # train duration = 0 for epoch in range(n_epoch): for i in range(iteration): # get the data from the queue x_data = data.get() y_label = labels.get() # open the dropout layer with training feed_dict = {x : x_data , y : y_label} feed_dict.update(networks.all_drop) # start time start_time = time.time() accu ,summary ,all = sess.run([accuracy,merged,train_op],feed_dict=feed_dict) duration += time.time() - start_time # Save a checkpoint and evaluate the model periodically. if (global_steps) % print_seq == 0 : tl.files.save_npz(networks.all_params,"ufc-101_model_%d.npz",sess) print('Training Data Eval:') train_images,train_labels = read_data(train_path,True) feed_dict = {x: train_images, y: train_labels} feed_dict.update(networks.all_drop) acc, summary ,op= sess.run([accuracy,merged,train_op],feed_dict=feed_dict) print("accuracy %.5f".format(acc)) train_writer.add_summary(summary, global_steps) # test the model on validation dataset print('Validation Data Eval:') val_images,val_labels = read_data(test_path,True) # close the dropout layer dp_dict = tl.utils.dict_to_one(networks.all_drop) feed_dict = {x: val_images, y_: val_labels} feed_dict.update(dp_dict) acc, summary, op= sess.run([accuracy, merged,train_op],feed_dict=feed_dict) print("accuracy: " + "{:.5f}".format(acc)) test_writer.add_summary(summary, global_steps)