def main(): parser = argparse.ArgumentParser( description='simple 3D convolution for action recognition') parser.add_argument('--batch', type=int, default=128) parser.add_argument('--epoch', type=int, default=100) parser.add_argument('--train', type=str, default='train.txt') parser.add_argument('--valid', type=str, default='valid.txt') parser.add_argument('--nclass', type=int, default=249) parser.add_argument('--output', type=str, required=True) parser.add_argument('--skip', type=bool, default=True) parser.add_argument('--depth', type=int, default=16) parser.add_argument('--nmodel', type=int, default=3) args = parser.parse_args() if not os.path.isdir(args.output): os.makedirs(args.output) img_rows, img_cols, frames = 32, 32, args.depth channel_c = 3 channel_d = 1 vid3d = videoto3d1.Videoto3D(img_rows, img_cols, frames) nb_classes = args.nclass fname_npz_train_c = 'dataset_trainc_{}_{}_{}.npz'.format( args.nclass, args.depth, args.skip) fname_npz_train_d = 'dataset_traind_{}_{}_{}.npz'.format( args.nclass, args.depth, args.skip) fname_npz_valid_c = 'dataset_validc_{}_{}_{}.npz'.format( args.nclass, args.depth, args.skip) fname_npz_valid_d = 'dataset_validd_{}_{}_{}.npz'.format( args.nclass, args.depth, args.skip) if os.path.exists(fname_npz_valid_c): loadeddata = np.load(fname_npz_valid_c) Xvc, Yvc = loadeddata["X"], loadeddata["Y"] else: # If not, we load the data with the helper function and save it for future use: xvc, yvc = loaddata(args.valid, vid3d, color=True, skip=True) Yvc = np_utils.to_categorical(yvc, nb_classes) Xvc = xvc.reshape( (xvc.shape[0], img_rows, img_cols, frames, channel_c)) Xvc = Xvc.astype('float32') np.savez(fname_npz_valid_c, X=Xvc, Y=Yvc) print('Saved valid dataset to dataset_train.npz.') if os.path.exists(fname_npz_valid_d): loadeddata = np.load(fname_npz_valid_d) Xvd, Yvd = loadeddata["X"], loadeddata["Y"] else: # If not, we load the data with the helper function and save it for future use: xvd, yvd = loaddata(args.valid, vid3d, color=False, skip=True) Yvd = Yvc Xvd = xvd.reshape( (xvd.shape[0], img_rows, img_cols, frames, channel_d)) Xvd = Xvd.astype('float32') np.savez(fname_npz_valid_d, X=Xvd, Y=Yvd) print('Saved valid dataset to dataset_train.npz.') if os.path.exists(fname_npz_train_c): loadeddata = np.load(fname_npz_train_c) Xtc, Ytc = loadeddata["X"], loadeddata["Y"] else: # If not, we load the data with the helper function and save it for future use: xtc, ytc = loaddata(args.train, vid3d, color=True, skip=True) Ytc = np_utils.to_categorical(ytc, nb_classes) Xtc = xtc.reshape( (xtc.shape[0], img_rows, img_cols, frames, channel_c)) Xtc = Xtc.astype('float32') np.savez(fname_npz_train_c, X=Xtc, Y=Ytc) print('Saved train dataset to dataset_train.npz.') if os.path.exists(fname_npz_train_d): loadeddata = np.load(fname_npz_train_d) Xtd, Ytd = loadeddata["X"], loadeddata["Y"] else: # If not, we load the data with the helper function and save it for future use: xtd, ytd = loaddata( args.train, vid3d, color=False, ) Ytd = Ytc Xtd = xtd.reshape( (xtd.shape[0], img_rows, img_cols, frames, channel_d)) Xtd = Xtd.astype('float32') np.savez(fname_npz_train_d, X=Xtd, Y=Ytd) print('Saved train dataset to dataset_train.npz.') X_train_c, X_test_c, Y_train_c, Y_test_c = Xtc, Xvc, Ytc, Yvc X_train_d, X_test_d, Y_train_d, Y_test_d = Xtd, Xvd, Ytd, Yvd sgd = optimizers.SGD(lr=0.01, momentum=0.9, decay=0.005, nesterov=True) adam = optimizers.Adam(lr=0.001, decay=0.0001, amsgrad=False) input_color = Input(shape=X_train_c.shape[1:], dtype='float32', name='input_color') # input_depth = Input(shape=X_train_d.shape[1:], dtype='float32', name='input_depth') model1 = model_from_json( open('3dcnnresult/ucf101_3dcnnmodel.json', 'r').read()) model1.load_weights('3dcnnresult/ucf101_3dcnnmodel.hd5') for layer in model1.layers: layer.name = layer.name + str("_2") model1.layers.pop() model1.layers[-1].outbound_nodes = [] model1.outputs = [model1.layers[-1].output] output = model1.get_layer(index=11).output for layer in model1.layers[0:5]: layer.trainable = False #output = Flatten()(output) new_model = Model(model1.input, output) new_model.summary() x = Flatten()(output) x = Dense(512, activation='relu')(x) x = Dropout(0.5)(x) x = Dense(nb_classes, activation='softmax', name='output')(x) model = Model(inputs=model1.input, outputs=x) model.summary() model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) callbacks_list = [XTensorBoard('logs/{}'.format(time()))] history = model.fit(X_train_c, Y_train_c, validation_data=(X_test_c, Y_test_c), batch_size=args.batch, epochs=args.epoch, verbose=1, shuffle=True, callbacks=callbacks_list) #model.fit([X_train_c, X_train_d], Y_train_c, validation_data=([X_test_c,X_test_d],Y_test_c), batch_size=args.batch,nb_epoch=args.epoch, verbose=1, shuffle=True , callbacks=callbacks_list) model_json = model.to_json() with open(os.path.join(args.output, 'Chalearn_3dcnnmodel_finetune.json'), 'w') as json_file: json_file.write(model_json) model.save_weights( os.path.join(args.output, 'Chalearn_3dcnnmodel_finetune.hd5'))
def main(): parser = argparse.ArgumentParser( description='simple 3D convolution for action recognition') parser.add_argument('--batch', type=int, default=128) parser.add_argument('--epoch', type=int, default=100) parser.add_argument('--videos', type=str, default='UCF101', help='directory where videos are stored') parser.add_argument('--nclass', type=int, default=101) parser.add_argument('--output', type=str, required=True) parser.add_argument('--color', type=bool, default=False) parser.add_argument('--skip', type=bool, default=True) parser.add_argument('--depth', type=int, default=10) args = parser.parse_args() img_rows, img_cols, frames = 32, 32, args.depth channel = 3 if args.color else 1 fname_npz = 'dataset_{}_{}_{}.npz'.format( args.nclass, args.depth, args.skip) vid3d = videoto3d1.Videoto3D(img_rows, img_cols, frames) nb_classes = args.nclass if os.path.exists(fname_npz): loadeddata = np.load(fname_npz) X, Y = loadeddata["X"], loadeddata["Y"] else: x, y = loaddata(args.videos, vid3d, args.nclass, args.output, args.color, args.skip) X = x.reshape((x.shape[0], img_rows, img_cols, frames, channel)) Y = np_utils.to_categorical(y, nb_classes) X = X.astype('float32') #np.savez(fname_npz, X=X, Y=Y) #print('Saved dataset to dataset.npz.') print('X_shape:{}\nY_shape:{}'.format(X.shape, Y.shape)) # Define model model = Sequential() model.add(Conv3D(32, kernel_size=(3, 3, 3), input_shape=( X.shape[1:]), padding="same")) model.add(LeakyReLU()) model.add(Conv3D(32, padding="same", kernel_size=(3, 3, 3))) model.add(LeakyReLU()) model.add(MaxPooling3D(pool_size=(3, 3, 3), padding="same")) model.add(Dropout(0.25)) model.add(Conv3D(64, padding="same", kernel_size=(3, 3, 3))) model.add(LeakyReLU()) model.add(Conv3D(64, padding="same", kernel_size=(3, 3, 3))) model.add(LeakyReLU()) model.add(MaxPooling3D(pool_size=(3, 3, 3), padding="same")) model.add(Dropout(0.25)) model.add(Conv3D(64, padding="same", kernel_size=(3, 3, 3))) model.add(LeakyReLU()) model.add(Conv3D(64, padding="same", kernel_size=(3, 3, 3))) model.add(LeakyReLU()) model.add(MaxPooling3D(pool_size=(3, 3, 3), padding="same")) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(512, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add(Dense(nb_classes, activation='softmax')) model.compile(loss=categorical_crossentropy, optimizer='rmsprop', metrics=['accuracy']) model.summary() plot_model(model, show_shapes=True, to_file=os.path.join(args.output, 'model.png')) X_train, X_test, Y_train, Y_test = train_test_split( X, Y, test_size=0.2, random_state=43) history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=args.batch, epochs=args.epoch, verbose=1, shuffle=True) model.evaluate(X_test, Y_test, verbose=0) model_json = model.to_json() if not os.path.isdir(args.output): os.makedirs(args.output) with open(os.path.join(args.output, 'ucf101_3dcnnmodel.json'), 'w') as json_file: json_file.write(model_json) model.save_weights(os.path.join(args.output, 'ucf101_3dcnnmodel.hd5')) loss, acc = model.evaluate(X_test, Y_test, verbose=0) print('Test loss:', loss) print('Test accuracy:', acc) plot_history(history, args.output) save_history(history, args.output)
def main(): parser = argparse.ArgumentParser(description='2D convolution') parser.add_argument('--batch', type=int, default=128) parser.add_argument('--epoch', type=int, default=100) parser.add_argument('--videos', type=str, default='videos', help='directory where videos are stored') parser.add_argument('--nclass', type=int, default=101) parser.add_argument('--output', type=str, required=True) args = parser.parse_args() img_rows, img_cols = 32, 32 vid3d = videoto3d1.Videoto3D(img_rows, img_cols, 1) x, y = loaddata(args.videos, vid3d, args.nclass, args.output) X = x.reshape(x.shape[0], img_cols, img_rows, 1) nb_classes = max(y) + 1 Y = np_utils.to_categorical(y, nb_classes) X = X.astype('float32') print('X shape:{}\nYshape:{}'.format(X.shape, Y.shape)) # define model model = Sequential() model.add( Convolution2D(32, 3, 3, border_mode='same', input_shape=X.shape[1:])) model.add(Activation('relu')) model.add(Convolution2D(32, 3, 3)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Convolution2D(64, 3, 3, border_mode='same')) model.add(Activation('relu')) model.add(Convolution2D(64, 3, 3)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) model.add(Flatten()) model.add(Dense(512)) model.add(Activation('relu')) model.add(Dropout(0.5)) model.add(Dense(nb_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.summary() plot_model(model, show_shapes=True, to_file=os.path.join(args.output, 'model.png')) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=4) history = model.fit(X_train, Y_train, batch_size=args.batch, nb_epoch=args.epoch, validation_data=(X_test, Y_test), shuffle=True) model_json = model.to_json() with open(os.path.join(args.output, 'ucf101cnnmodel.json'), 'w') as json_file: json_file.write(model_json) model.save_weights(os.path.join(args.output, 'ucf101cnnmodel.hd5')) loss, acc = model.evaluate(X_test, Y_test, verbose=0) print('Test loss:', loss) print('Test accuracy:', acc) plot_history(history, args.output) save_history(history, args.output)
def main(): parser = argparse.ArgumentParser( description='simple 3D convolution for action recognition') parser.add_argument('--batch', type=int, default=32) parser.add_argument('--epoch', type=int, default=100) parser.add_argument('--train', type=str, default='train.txt') parser.add_argument('--valid', type=str, default='valid.txt') parser.add_argument('--nclass', type=int, default=249) parser.add_argument('--output', type=str, required=True) parser.add_argument('--skip', type=bool, default=True) parser.add_argument('--depth', type=int, default=16) args = parser.parse_args() #Initializing the dimentions of the frames img_rows, img_cols, frames = 32, 32, args.depth channel = 1 nb_classes = args.nclass fname_npz_train_c = 'dataset_trainc_{}_{}_{}.npz'.format( args.nclass, args.depth, args.skip) fname_npz_train_d = 'dataset_traind_{}_{}_{}.npz'.format( args.nclass, args.depth, args.skip) fname_npz_valid_c = 'dataset_validc_{}_{}_{}.npz'.format( args.nclass, args.depth, args.skip) fname_npz_valid_d = 'dataset_validd_{}_{}_{}.npz'.format( args.nclass, args.depth, args.skip) #fname_npz_train = 'dataset_train_{}_{}_{}.npz'.format( # args.nclass, args.depth, args.skip) #fname_npz_valid = 'dataset_valid_{}_{}_{}.npz'.format( # args.nclass, args.depth, args.skip) loadeddata = np.load(fname_npz_valid_d) Xvc, Yvc = loadeddata["X"], loadeddata["Y"] loadeddata = np.load(fname_npz_train_d) Xtc, Ytc = loadeddata["X"], loadeddata["Y"] vid3d = videoto3d1.Videoto3D(img_rows, img_cols, frames) #if os.path.exists(fname_npz_valid): # loadeddata = np.load(fname_npz_valid) # Xv, Yv = loadeddata["X"], loadeddata["Y"] #else: # If not, we load the data with the helper function and save it for future use: # xv, yv = loaddata(args.valid, vid3d, args.skip) # Yv = np_utils.to_categorical(yv, nb_classes) # Xv = xv.reshape((xv.shape[0], img_rows, img_cols, frames, channel)) # Xv = Xv.astype('float32') # np.savez(fname_npz_valid, X=Xv, Y=Yv) # print('Saved valid dataset to dataset_train.npz.') #If the dataset is already stored in npz file: #if os.path.exists(fname_npz_train): # loadeddata = np.load(fname_npz_train) # Xt, Yt = loadeddata["X"], loadeddata["Y"] #else: #If not, we load the data with the helper function and save it for future use: # xt, yt = loaddata(args.train, vid3d, args.skip) # Yt= np_utils.to_categorical(yt, nb_classes) # Xt = xt.reshape((xt.shape[0], img_rows, img_cols, frames, channel)) # Xt = Xt.astype('float32') # np.savez(fname_npz_train, X=Xt, Y=Yt) # print('Saved training dataset to dataset_train.npz.') #X = np.append(Xt, Xv, 0) #Y = np.append(Yt, Yv, 0) print('Xt_shape:{}\nYt_shape:{}'.format(Xtc.shape, Ytc.shape)) print('Xv_shape:{}\nYv_shape:{}'.format(Xvc.shape, Yvc.shape)) #print('X Shape: {}\nY Shape:{}'.format(X.shape, Y.shape)) X_train, X_test, Y_train, Y_test = Xtc, Xvc, Ytc, Yvc #X_train, X_test, Y_train, Y_test = train_test_split( # X, Y, test_size=0.2, random_state=43) # Define model model = Sequential() model.add( Conv3D(32, kernel_size=(3, 3, 3), input_shape=(X_train.shape[1:]), padding="same")) model.add(LeakyReLU()) model.add(Conv3D(32, padding="same", kernel_size=(3, 3, 3))) model.add(LeakyReLU()) model.add(MaxPooling3D(pool_size=(3, 3, 3), padding="same")) model.add(Dropout(0.25)) model.add(Conv3D(64, padding="same", kernel_size=(3, 3, 3))) model.add(LeakyReLU()) model.add(Conv3D(64, padding="same", kernel_size=(3, 3, 3))) model.add(LeakyReLU()) model.add(MaxPooling3D(pool_size=(3, 3, 3), padding="same")) model.add(Dropout(0.25)) model.add(Conv3D(64, padding="same", kernel_size=(3, 3, 3))) model.add(LeakyReLU()) model.add(Conv3D(64, padding="same", kernel_size=(3, 3, 3))) model.add(LeakyReLU()) model.add(MaxPooling3D(pool_size=(3, 3, 3), padding="same")) model.add(Dropout(0.5)) model.add(Flatten()) model.add(Dense(512, activation='relu')) model.add(BatchNormalization()) model.add(Dropout(0.5)) model.add(Dense(nb_classes, activation='softmax')) model.summary() plot_model(model, show_shapes=True, to_file=os.path.join(args.output, 'model.png')) #List of Optimizers we used: adam = optimizers.Adam(lr=0.01, decay=0.0001, amsgrad=False) sgd = optimizers.SGD(lr=0.01, momentum=0.9, decay=0.005, nesterov=True) ada = optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0) nadam = optimizers.Nadam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) #Compiling and fitting the model model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) callbacks_list = [XTensorBoard('logs/{}'.format(time()))] #tensorboard = TensorBoard(log_dir="logs/{}".format(time())) print(eval(model.optimizer.lr)) history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=args.batch, epochs=args.epoch, verbose=1, shuffle=True, callbacks=callbacks_list) #Saving the model model_json = model.to_json() if not os.path.isdir(args.output): os.makedirs(args.output) with open( os.path.join( args.output, '3dcnn_{}_{}_depth.json'.format(args.epoch, args.batch)), 'w') as json_file: json_file.write(model_json) model.save_weights( os.path.join(args.output, '3dcnn_{}_{}_depth.h5'.format(args.epoch, args.batch))) #Evaluation on test data if available loss, acc = model.evaluate(X_test, Y_test, verbose=1) print('Test loss:', loss) print('Test accuracy:', acc) plot_history(history, args.output) save_history(history, args.output)
def main(): parser = argparse.ArgumentParser( description='simple 3D convolution for action recognition') parser.add_argument('--batch', type=int, default=32) parser.add_argument('--epoch', type=int, default=100) parser.add_argument('--videos', type=str, default='UCF101', help='directory where videos are stored') parser.add_argument('--nclass', type=int, default=249) parser.add_argument('--output', type=str, required=True) parser.add_argument('--skip', type=bool, default=True) parser.add_argument('--depth', type=int, default=16) args = parser.parse_args() #Initializing the dimentions of the frames img_rows, img_cols, frames = 32, 32, args.depth channel = 3 nb_classes = args.nclass fname_npz = 'dataset_test_{}_{}_{}.npz'.format(args.nclass, args.depth, args.skip) vid3d = videoto3d1.Videoto3D(img_rows, img_cols, frames) #If the dataset is already stored in npz file: if os.path.exists(fname_npz): loadeddata = np.load(fname_npz) X, Y = loadeddata["X"], loadeddata["Y"] else: #If not, we load the data with the helper function and save it for future use: x, y = loaddata(args.videos, vid3d, args.nclass, args.output, args.skip) Y = np_utils.to_categorical(y, nb_classes) X = x.reshape((x.shape[0], img_rows, img_cols, frames, channel)) X = X.astype('float32') np.savez(fname_npz, X=X, Y=Y) print('Saved test dataset to dataset_test.npz.') print('X_shape:{}\nY_shape:{}'.format(X.shape, Y.shape)) # Define model model = model_from_json( open('3dcnnresult/3dcnn_500_32_adam2.json', 'r').read()) model.load_weights('3dcnnresult/3dcnn_500_32_adam2.h5') model.summary() print("Loaded model from disk") #List of Optimizers we used: adam = optimizers.Adam(lr=0.01, decay=0.0001, amsgrad=False) sgd = optimizers.SGD(lr=0.001, momentum=0.9, decay=0.001, nesterov=True) ada = optimizers.Adagrad(lr=0.01, epsilon=None, decay=0.0) nadam = optimizers.Nadam(lr=0.01, beta_1=0.9, beta_2=0.999, epsilon=None, schedule_decay=0.004) #Compiling and fitting the model model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy']) #Evaluating the model on the test_set loss, acc = model.evaluate(X, Y, verbose=1) print('Test loss:', loss) print('Test accuracy:', acc)
def main(): parser = argparse.ArgumentParser( description='simple 3D convolution for action recognition') parser.add_argument('--batch', type=int, default=128) parser.add_argument('--epoch', type=int, default=100) parser.add_argument('--videos', type=str, default='UCF101', help='directory where videos are stored') parser.add_argument('--nclass', type=int, default=101) parser.add_argument('--output', type=str, required=True) parser.add_argument('--skip', type=bool, default=True) parser.add_argument('--depth', type=int, default=10) args = parser.parse_args() img_rows, img_cols, frames = 112, 112, args.depth channel = 3 fname_npz = 'dataset_{}_{}_{}.npz'.format(args.nclass, args.depth, args.skip) vid3d = videoto3d1.Videoto3D(img_rows, img_cols, frames) nb_classes = args.nclass x, y = loaddata(args.videos, vid3d, args.nclass, args.output, args.skip) X = x.reshape((x.shape[0], img_rows, img_cols, frames, channel)) Y = np_utils.to_categorical(y, nb_classes) X = X.astype('float32') print('X_shape:{}\nY_shape:{}'.format(X.shape, Y.shape)) # Define model model = model_from_json(open('caffe_weights/sports_1M.json', 'r').read()) model = get_model(X, nb_classes, summary=True) model.load_weights('caffe_weights/sports1M_weights.h5') #model.save_weights('sports1M_weights.h5', overwrite=True) #json_string = model.to_json() #with open('sports1M_model.json', 'w') as f: # f.write(json_string) # Freeze the layers except the last 4 layers for layer in model.layers[:-5]: layer.trainable = False # Check the trainable status of the individual layers for layer in model.layers: print(layer, layer.trainable) model.summary() model.compile(loss=categorical_crossentropy, optimizer=Adam(), metrics=['accuracy']) plot_model(model, show_shapes=True, to_file=os.path.join(args.output, 'model.png')) X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=43) history = model.fit(X_train, Y_train, validation_data=(X_test, Y_test), batch_size=args.batch, epochs=args.epoch, verbose=1, shuffle=True) model.evaluate(X_test, Y_test, verbose=0) model_json = model.to_json() if not os.path.isdir(args.output): os.makedirs(args.output) with open(os.path.join(args.output, 'ucf101_3dcnnmodel.json'), 'w') as json_file: json_file.write(model_json) model.save_weights(os.path.join(args.output, 'ucf101_3dcnnmodel.hd5')) loss, acc = model.evaluate(X_test, Y_test, verbose=0) print('Test loss:', loss) print('Test accuracy:', acc) plot_history(history, args.output) save_history(history, args.output)