def classify(video_file, seq_length=20, saved_model='./cnn_lstm_VGGFace10.h5'): capture = cv2.VideoCapture(os.path.join(video_file)) width = capture.get(cv2.CAP_PROP_FRAME_WIDTH) # float height = capture.get(cv2.CAP_PROP_FRAME_HEIGHT) # float print('#########################################################', video_file, '#########################################################') # Get the dataset. data = DataSet(seq_length=seq_length, class_limit=2, image_shape=(224, 224, 3)) # get the model. extract_model = Extractor(image_shape=(height, width, 3)) rm = ResearchModels(len(data.classes), 'lstm', seq_length, saved_model, features_length=2622) saved_LSTM_model = rm.lstm() saved_LSTM_model.load_weights(saved_model) frames = [] frame_count = 0 while True: ret, frame = capture.read() print(ret) # Bail out when the video file ends if not ret: break # Save each frame of the video to a list frame_count += 1 frames.append(frame) if frame_count < seq_length: continue # capture frames untill you get the required number for sequence else: frame_count = 0 # For each frame extract feature and prepare it for classification sequence = [] for image in frames: image = cv2.resize(image, (224, 224), 3) features = extract_model.extract_image(image) sequence.append(features) # Clasify sequence prediction = saved_LSTM_model.predict(np.expand_dims(sequence, axis=0)) print('classofyyyyyyyyyyy') print(prediction) values = data.print_class_from_prediction( np.squeeze(prediction, axis=0)) # print(np.argmax(prediction)) frames = [] print(np.argmax(prediction)) return np.argmax(prediction)
def train(ImageLoader, data_type, seq_length, saved_model=None, class_limit=None, image_shape=None, load_to_memory=False, batch_size=32, nb_epoch=100): checkpointer = ModelCheckpoint(filepath=os.path.join( 'data', 'checkpoints', data_type + '.{epoch:03d}-{acc:.3f}.hdf5'), verbose=1, save_best_only=False) tb = TensorBoard(log_dir=os.path.join('data', 'logs')) early_stopper = EarlyStopping(patience=5) timestamp = time.time() X, y = ImageLoader.load() rm = ResearchModels(class_limit, seq_length, saved_model) rm.model.fit(X, y, batch_size=batch_size, verbose=1, callbacks=[tb, early_stopper, checkpointer], epochs=nb_epoch)
def validate(data_type, model, seq_length=40, saved_model=None, class_limit=None, image_shape=None): batch_size = 8 # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) val_generator = data.frame_generator(batch_size, 'test', data_type) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) # Evaluate! results = rm.model.evaluate_generator(generator=val_generator, val_samples=3200) print(results) print(rm.model.metrics_names)
def predict(data_type, model, seq_length=80, saved_model=None, concat=False, class_limit=None, image_shape=None): batch_size = 48 correct = 0 # Get the data and process it. data = Predict_DataSet(seq_length=seq_length, class_limit=class_limit) total = len(data.data) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) for each_data in range(len(data.data)): val_generator = data.frame_generator(batch_size, each_data, data_type, concat) # predict! results = rm.model.predict_generator(val_generator, steps=1) with open('data_file_170825_test.csv', 'w', newline='') as fout: writer = csv.writer(fout) writer.writerows(results) predict_list = [] for row in results: max_idx = 0 for i in range(len(data.classes)): if row[i] > row[max_idx]: max_idx = i predict_list.append(max_idx) predict_result = 0 for i in range(len(data.classes)): if predict_list.count(i) > predict_list.count(predict_result): predict_result = i #predict result predict_class = data.classes[predict_result] o_or_x = '' if data.data[each_data][0].find(predict_class) != -1: correct = correct + 1 o_or_x = 'o' else: o_or_x = 'x' print(data.data[each_data][1] + ' -> ' + predict_class + ' : ' + o_or_x) print('correct: ' + str(correct) + '/' + str(total) + ' (' + str(100 * correct / total) + '%)')
def validate(data_type, model, seq_length=40, saved_model=None, class_limit=None, image_shape=None): # Creating train generator with 8596 samples. # Creating test generator with 3418 samples. # Total 12041 samples test_data_num = 3418 batch_size = 32 # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) test_generator = data.frame_generator(batch_size, 'test', data_type) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) #model = load_model(saved_model) # Evaluate! #results = rm.model.evaluate_generator( # generator=val_generator, # val_samples=3200) results = rm.model.evaluate_generator(generator=test_generator, steps=test_data_num // batch_size) print(results) print(rm.model.metrics_names)
def train(model, data_path, sequence_length=30, batch_size=32, nb_epoch=100, split_strat=1, split=0.3): create_log_dirs(["logs/", "logs/csv/", "logs/tensorboard", "logs/checkpoints"], data_path) model_name = "{}-{}".format(model, time.time()) # Helper: Save the model. checkpoint = ModelCheckpoint( filepath=os.path.join(data_path, 'logs', 'checkpoints', model_name + '-' + '.{epoch:03d}-{val_loss:.3f}.hdf5'), verbose=1, save_best_only=True) # Helper: TensorBoard tb = TensorBoard(log_dir="{}logs/tensorboard/{}".format(data_path, model_name)) # Helper: Stop when we stop learning. early_stopper = EarlyStopping(patience=5) # Helper: Save data in csv csv_logger = CSVLogger(os.path.join(data_path, 'logs', 'csv', model_name + '-' + 'training-' + str(time.time()) + '.csv')) # Training model data_loader = DataLoader(data_path, "frames", split_strat=split_strat, split=split) if model in ["lstm"]: data_loader = DataLoader(data_path, "features", split_strat=split_strat, split=split) X, y, X_test, y_test, n_classes = data_loader.load_data() rm = ResearchModels(n_classes, model, sequence_length) print(X.shape) rm.model.fit(X, y,batch_size=batch_size,validation_data=(X_test, y_test), verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpoint], epochs=nb_epoch)
def validate(data_type, model, seq_length=40, saved_model=None, class_limit=None, image_shape=None): batch_size = 463 # Get the data and process it. if image_shape is None: data = DataSet( seq_length=seq_length, class_limit=class_limit ) else: data = DataSet( seq_length=seq_length, class_limit=class_limit, image_shape=image_shape ) val_generator = data.frame_generator(batch_size, 'test', data_type) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) # # Evaluate! # results = rm.model.evaluate_generator( # generator=val_generator, # steps=10) # # print(results) # print(rm.model.metrics_names) print('Classification Metric for testing phase \n') metric_calculation(val_generator, rm.model, 0)
def validate(data_type, model, seq_length=125, saved_model=None, concat=False, class_limit=None, image_shape=None): batch_size = 1 # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) val_generator = data.frame_generator(batch_size, 'test', data_type, concat) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) # Evaluate! prediction = rm.model.predict_generator( generator=val_generator, val_samples=4) #put the value as the number of test files prediction = prediction.tolist() print(prediction) print("===========================") prediction1 = pd.DataFrame(prediction).to_csv('prediction.csv')
def validate(model, saved_model, npoints=80, datafile='rect_same_period', pad=True, resized=False, **kargs): now = datetime.now() date = now.strftime("%d:%m:%Y-%H:%M") data = DataSet(npoints=npoints, datafile=datafile, **kargs) rm = ResearchModels(model, npoints=npoints, saved_model=saved_model) indices, X, y = data.get_all_sequences_in_memory('test', with_indices=True, pad=pad, resized=resized) eval = rm.model.evaluate(X, y) pred = rm.model.predict(X) print(eval) np.save( '.tmp/indices-%s-%s-%s' % (model, datafile, os.path.basename(saved_model)), indices) np.save( '.tmp/prediction-%s-%s-%s' % (model, datafile, os.path.basename(saved_model)), pred) np.save( '.tmp/true-%s-%s-%s' % (model, datafile, os.path.basename(saved_model)), y)
def validate(data_type, model, seq_length=50, saved_model=None, class_limit=None, image_shape=None, train_test='test'): # batch_size = 32 batch_size = 1 # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) # _, test = data.split_train_test() # size = len(test) # val_generator = data.frame_generator(batch_size, 'test', data_type) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) rm.model.layers.pop() rm.model.outputs = [rm.model.layers[-2].output] rm.model.output_layers = [rm.model.layers[-2]] rm.model.layers[-2].outbound_nodes = [] # X = rm.layers[-1].output # self.model.layers.pop() # two pops to get to pool layer # self.model.outputs = [self.model.layers[-1].output] X, y = data.get_data_train_test(data_type, train_test) size = len(X) # Evaluate! # results = rm.model.evaluate_generator( # generator=val_generator, # val_samples=3200) # # print(results) # print(rm.model.metrics_names) # results = rm.model.predict_generator( # generator=val_generator, # val_samples=size, # # val_samples=3200, # verbose=1) results = rm.model.predict( X, # val_samples=size, # val_samples=3200, verbose=1) print(results.shape) return (results, y)
def validate(data_type, seq_length=50, saved_model=None, class_limit=None, image_shape=None): sequenceLoader = ImageLoader(param['testSet'], param['testLabels'], seq_length, image_shape) X, y = sequenceLoader.load() rm = ResearchModels(class_limit, seq_length, saved_model) results = rm.model.evaluate(X, y) print(results) print(rm.model.metrics_names)
def validate(model, saved_model, npoints=20,**kargs): data = DataSet(npoints=npoints, **kargs) rm = ResearchModels(model, npoints=npoints, saved_model=saved_model) X, y = data.get_all_sequences_in_memory('test') eval = rm.model.evaluate(X,y) pred = rm.model.predict(X) print(eval) np.save('prediction',pred) np.save('true',y)
def train(model, load_to_memory=True, batch_size=None, nb_epoch=100, npoints=40, **kargs): # Helper: Save the model. if not os.path.isdir(os.path.join(data_dir, 'checkpoints', model)): os.mkdir(os.path.join(data_dir, 'checkpoints/', model)) checkpointer = ModelCheckpoint(filepath=os.path.join( data_dir, 'checkpoints/', model, 'saved_9_25.hdf5'), verbose=1, save_best_only=True) # Helper: TensorBoard tb = TensorBoard(log_dir=os.path.join(data_dir, 'logs', model)) # Helper: Stop when we stop learning. early_stopper = EarlyStopping(patience=10) # Helper: Save results. t = time.localtime(time.time()) timestamp = str(t.tm_mon) + '-' + str(t.tm_mday) + ':' + str( t.tm_hour) + '-' + str(t.tm_min) csv_logger = CSVLogger(os.path.join(data_dir, 'logs', model + '-' + 'training-' + \ str(timestamp) + '.log')) data = DataSet(npoints=npoints, **kargs) rm = ResearchModels(model, npoints=npoints) if load_to_memory: # Get data. X, y = data.get_all_sequences_in_memory('train') X_val, y_val = data.get_all_sequences_in_memory('val') else: # Get generators. steps_per_epoch = len(data.train) // batch_size generator = data.frame_generator(batch_size, 'train') val_generator = data.frame_generator(batch_size, 'val') if load_to_memory: # Use standard fit. rm.model.fit(X, y, batch_size=batch_size, validation_data=(X_val, y_val), verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpointer], epochs=nb_epoch) else: # Use fit generator. rm.model.fit_generator( generator=generator, steps_per_epoch=steps_per_epoch, epochs=nb_epoch, verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpointer], validation_data=val_generator, validation_steps=40)
def load_model(): model = 'lstm' saved_model = 'data\\checkpoints\\lstm-features.546-0.195.hdf5' data_type = 'features' image_shape = None concat = False rm = ResearchModels(len(data.classes), model, seq_length, saved_model)
def train(data_type, seq_length, model, saved_model=None, class_limit=None, image_shape=None, load_to_memory=False, batch_size=32, nb_epoch=100): checkpointer = ModelCheckpoint( filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \ '.{epoch:03d}-{val_loss:.3f}.hdf5'), verbose=1, save_best_only=True) timestamp = time.time() csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \ str(timestamp) + '.log')) if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) steps_per_epoch = (len(data.data) * 0.7) // batch_size if load_to_memory: X, y = data.get_all_sequences_in_memory('train', data_type) X_test, y_test = data.get_all_sequences_in_memory('test', data_type) else: generator = data.frame_generator(batch_size, 'train', data_type) val_generator = data.frame_generator(batch_size, 'test', data_type) rm = ResearchModels(len(data.classes), model, seq_length, saved_model) if load_to_memory: rm.model.fit(X, y, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1, callbacks=[tb, early_stopper, csv_logger], epochs=nb_epoch) else: rm.model.fit_generator( generator=generator, steps_per_epoch=steps_per_epoch, epochs=nb_epoch, verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpointer], validation_data=val_generator, validation_steps=40, workers=4)
def train(data_type, seq_length, model, saved_model=None, class_limit=None, image_shape=None, batch_size=32, nb_epoch=100): # Helper: Save the model. checkpointer = ModelCheckpoint( filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \ '.{epoch:03d}-{val_loss:.3f}.hdf5'), verbose=1, save_best_only=True) # Helper: TensorBoard tb = TensorBoard(log_dir=os.path.join('data', 'logs', model)) # Helper: Stop when we stop learning. early_stopper = EarlyStopping(patience=5) # Helper: Save results. timestamp = time.time() csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \ str(timestamp) + '.log')) train_data = Dao('./train_dataset_desc', seq_length=seq_length, image_shape=image_shape) validation_data = Dao('./validation_dataset_desc', seq_length=seq_length, image_shape=image_shape) steps_per_epoch = train_data.size() // batch_size train_gen = train_data.frame_generator(batch_size) val_generator = validation_data.frame_generator(batch_size) # Get the model. rm = ResearchModels(train_data.num_of_classes(), model, seq_length, saved_model) rm.model.fit_generator( generator=train_gen, steps_per_epoch=steps_per_epoch, epochs=nb_epoch, verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpointer], validation_data=val_generator, validation_steps=40, workers=4)
def __init__(self, epoch=25, val_loss=1.174, model_type='lstm', seq_length=40, data_type='features'): # model can be one of lstm, lrcn, mlp, conv_3d, c3d self.model_type = model_type self.seq_length = seq_length self.data_type = data_type filepath = os.path.join( '..', 'data', 'checkpoints', model_type + '-' + data_type + '.{:03d}-{:.3f}.hdf5'.format(epoch, val_loss)) # print('Loading the model:', filepath) # model = load_model(filepath) # Get the data and process it. self.data = DataSet(seq_length=seq_length, class_limit=None) # Get the model. print("Model Type:", model_type) self.rm = ResearchModels(len(self.data.classes), self.model_type, self.seq_length, filepath) # read the video IDs # self.all_video_ids = sorted([os.path.basename(name).split('.webm')[0] for name in glob.glob('../*/*/*.webm')]) self.all_video_ids = sorted([ os.path.basename(name).split('.webm')[0] for name in glob.glob('../*/videos_safe_viewing/*.webm') ]) assert (len(self.all_video_ids) != 0) remove_list = read_remove_list() # Not needed any more # for item in remove_list: # try: # self.all_video_ids.remove(item) # except: # print(item, 'not in list') self.showing_ids = [] print('Ready to accept ReST calls!')
def predict(data_type, model, seq_length=80, saved_model=None, concat=False, class_limit=None, image_shape=None): batch_size = 48 # Get the data and process it. data = Predict_DataSet(seq_length=seq_length, class_limit=class_limit) val_generator = data.frame_generator(batch_size, 'predict', data_type, concat) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) # predict! results = rm.model.predict_generator(val_generator, steps = 1) with open('data_file_170924_test.csv', 'w', newline='') as fout: writer = csv.writer(fout) writer.writerows(results) predict_list=[] for row in results: max_idx = 0 for i in range(len(data.classes)): if row[i]>row[max_idx] : max_idx = i predict_list.append(max_idx) predict_result = 0 for i in range(len(data.classes)): if predict_list.count(i)>predict_list.count(predict_result): predict_result = i #predict result predict_class = data.classes[predict_result] print(predict_result) print(predict_class) return predict_class
def classify_video(lstm_weights, frames_features): """ Run the features (of the frames) through LSTM to classify video Returns most probable video-category """ print("Load LSTM network ...") lstm = ResearchModels(model="lstm", saved_model=lstm_weights, nb_classes=101, seq_length=frames_nb) # Resize features for input into lstm model X = np.array(frames_features) X.resize(1, frames_nb, features_length) print("Predict video category ...") timer_start() category = lstm.model.predict(X) timer_stop() print("Most probable 3 categories:", category.argsort(axis=1)[:, -3:][:, ::-1]) return category
def validate(data_type, model, seq_length=80, saved_model=None, class_limit=None, image_shape=None): batch_size = 1 # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) val_generator = data.frame_generator(batch_size, 'test', data_type) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) # Evaluate! results = rm.model.evaluate_generator(generator=val_generator, steps=4) print(results) print(rm.model.metrics_names) x, y = data.get_all_sequences_in_memory('test', data_type) print("CLASS:", y) pred_result = rm.model.predict_classes(x, batch_size=1) print("PREDICTED RESULT 1:", pred_result) # Predict! prediction = rm.model.predict(np.expand_dims(x, axis=0)) print("PREDICTED RESULT 2:", prediction) data.print_class_from_prediction(np.squeeze(prediction, axis=0))
def train(data_type, seq_length, model, learning_rate,learning_decay,saved_model=None, class_limit=None, image_shape=None, load_to_memory=False, batch_size=32, nb_epoch=100): print('trainig_num is ', training_num) if model == 'lstm_regression': regression = 1 sequence_len = 20 monitor_par = 'val_loss' else: regression = 0 sequence_len = seq_length monitor_par = 'val_acc' # Helper: Save the model. checkpointer = ModelCheckpoint( filepath=os.path.join(main_folder, 'checkpoints',model+'2', model + '-{epoch:03d}.hdf5'), #filepath=os.path.join(main_folder, 'checkpoints',model, model + '-' + data_type + \ #'.{epoch:03d}-{val_loss:.3f}.hdf5'), monitor=monitor_par, verbose=1, save_best_only=True) # # Helper: TensorBoard # tb = TensorBoard(log_dir=os.path.join('data', 'logs', model)) # # Helper: Stop when we stop learning. # early_stopper = EarlyStopping(patience=5) # Helper: Save results. #timestamp = time.time() csv_logger = CSVLogger(os.path.join(main_folder, 'logs', model +'2'+'-' + 'training-log' + '.csv')) # Get the data and process it. if image_shape is None: data = DataSet( seq_length=seq_length, class_limit=class_limit ) else: data = DataSet( seq_length=seq_length, class_limit=class_limit, image_shape=image_shape ) # Get samples per epoch. # Multiply by 0.7 to attempt to guess how much of data.data is the train set. #steps_per_epoch = (len(data.data) * 0.7) // batch_size steps_per_epoch = training_num // batch_size print('step is: %d'%steps_per_epoch) if load_to_memory: # Get data. X, y = data.get_all_sequences_in_memory('train', data_type) X_test, y_test = data.get_all_sequences_in_memory('test', data_type) else: # Get generators. generator = data.frame_generator(batch_size, 'train', data_type,regression) val_generator = data.frame_generator(batch_size, 'test', data_type,regression) # Get the model. rm = ResearchModels(len(data.classes), model, sequence_len, learning_rate,learning_decay,saved_model) # Fit! if load_to_memory: # Use standard fit. hist = rm.model.fit( X, y, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1, callbacks=[csv_logger], epochs=nb_epoch) else: # Use fit generator. hist = rm.model.fit_generator( generator=generator, steps_per_epoch=steps_per_epoch, # in each epoch all the training data are evaluated epochs=nb_epoch, verbose=1, callbacks=[csv_logger, checkpointer], validation_data=val_generator, validation_steps=40, workers=4) # if you see that GPU is idling and waiting for batches, try to increase the amout of workers return hist
def train(data_type, seq_length, model, saved_model=None, class_limit=None, image_shape=None, load_to_memory=False, batch_size=32, nb_epoch=100): # Helper: Save the model. checkpointer = ModelCheckpoint( filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \ '.{epoch:03d}-{val_loss:.3f}.hdf5'), verbose=1, save_best_only=True) # Helper: TensorBoard tb = TensorBoard(log_dir=os.path.join('data', 'logs', model)) # Helper: Stop when we stop learning. early_stopper = EarlyStopping(patience=5) # Helper: Save results. timestamp = time.time() csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \ str(timestamp) + '.log')) # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) # Get samples per epoch. # Multiply by 0.7 to attempt to guess how much of data.data is the train set. steps_per_epoch = (len(data.data) * 0.7) // batch_size if load_to_memory: # Get data. X, y = data.get_all_sequences_in_memory('train', data_type) X_test, y_test = data.get_all_sequences_in_memory('test', data_type) else: # Get generators. generator = data.frame_generator(batch_size, 'train', data_type) val_generator = data.frame_generator(batch_size, 'test', data_type) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) # Fit! if load_to_memory: # Use standard fit. rm.model.fit(X, y, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1, callbacks=[tb, early_stopper, csv_logger], epochs=nb_epoch) else: # Use fit generator. rm.model.fit_generator( generator=generator, steps_per_epoch=steps_per_epoch, epochs=nb_epoch, verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpointer], validation_data=val_generator, validation_steps=40, workers=4)
def train(data_type, seq_length, model, saved_model=None, concat=False, class_limit=None, image_shape=None, load_to_memory=False): # Set variables. nb_epoch = 1000 batch_size = 16 # Helper: Save the model. checkpointer = ModelCheckpoint( filepath='./data/checkpoints/' + model + '-' + data_type + \ '.{epoch:03d}-{val_loss:.3f}.hdf5', verbose=1, save_best_only=True) incepcheck = ModelCheckpoint( filepath='./data/checkpoints/' + model + '-' + data_type + \ '.{epoch:03d}-{val_loss:.3f}.hdf5', verbose=1, save_best_only=True, save_weights_only=True) # Helper: TensorBoard tb = TensorBoard(log_dir='./data/logs') # Helper: Stop when we stop learning. early_stopper = EarlyStopping(patience=10) # Helper: Save results. timestamp = time.time() csv_logger = CSVLogger('./data/logs/' + model + '-' + 'training-' + \ str(timestamp) + '.log') # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) # Get samples per epoch. # Multiply by 0.7 to attempt to guess how much of data.data is the train set. steps_per_epoch = (len(data.data) * 0.7) // batch_size if load_to_memory: # Get data. X, y = data.get_all_sequences_in_memory(batch_size, 'train', data_type, concat) X_test, y_test = data.get_all_sequences_in_memory( batch_size, 'test', data_type, concat) elif model == 'div_crnn': generator = data.frame_generator2(batch_size, 'train', data_type, concat) val_generator = data.frame_generator2(batch_size, 'test', data_type, concat) else: # Get generators. generator = data.frame_generator(batch_size, 'train', data_type, concat) val_generator = data.frame_generator(batch_size, 'test', data_type, concat) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) # model_json_str = rm.model.to_json() # open('/home/takubuntu/PycharmProjects/DL/Wake_detect/IR_classification/data/checkpoints/json_model.json','w').write(model_json_str) # Fit! if load_to_memory: # Use standard fit. rm.model.fit(X, y, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1, callbacks=[checkpointer, tb, csv_logger], epochs=nb_epoch) # elif model == 'inception*': # rm.model.fit_generator( # generator=generator, # steps_per_epoch=steps_per_epoch, # epochs=nb_epoch, # verbose=1, # callbacks=[incepcheck, tb, csv_logger], # validation_data=val_generator, # validation_steps=10) else: # Use fit generator. rm.model.fit_generator(generator=generator, steps_per_epoch=steps_per_epoch, epochs=nb_epoch, verbose=1, callbacks=[checkpointer, tb, csv_logger], validation_data=val_generator, validation_steps=10)
def train(data_type, seq_length, model, class_path, saved_model=None, class_limit=None, image_shape=None, features=False, batch_size=32, nb_epoch=50, num_classes=10): # Helper: Save the model. checkpointer = ModelCheckpoint( filepath=os.path.join(class_path, 'CNN', 'JESTER', 'scripts', 'multiple_frames', 'checkpoints', model + '-' + data_type + '-' + '4_class_50_epochs_normal' + \ '.{epoch:03d}-{val_loss:.3f}.hdf5'), verbose=1, save_best_only=True) # Helper: TensorBoard tb = TensorBoard( log_dir=os.path.join(class_path, 'CNN', 'JESTER', 'scripts', 'multiple_frames', 'tf_logs', model + '_' + '4_class_50_epochs_normal')) # Helper: Stop when we stop learning. early_stopper = EarlyStopping(patience=5) # Helper: Save results. timestamp = time.time() csv_logger = CSVLogger(os.path.join(class_path, 'CNN', 'JESTER', 'scripts', 'multiple_frames', 'result_logs', model + '-' + 'training-' + '4_class_50_epochs_normal' +\ str(timestamp) + '.log')) dataset_class_path = '{0}/CNN/JESTER/data'.format(class_path) data = Dataset(path=dataset_class_path) if features: # get sequence feature data (post InceptionV3 with imagenet) start_time = dt.datetime.now() print('Start sequence data import {}'.format(str(start_time))) X_train, y_train = data.load_JESTER_sequences('train', categorical=True) X_test, y_test = data.load_JESTER_sequences('test', categorical=True) end_time = dt.datetime.now() print('Stop load sequence data time {}'.format(str(end_time))) elapsed_time = end_time - start_time print('Elapsed load sequence data time {}'.format(str(elapsed_time))) elif features == None: start_time = dt.datetime.now() print('Start sequence data import {}'.format(str(start_time))) X_train, y_train = data.load_JESTER('train', categorical=True) X_test, y_test = data.load_JESTER('test', categorical=True) end_time = dt.datetime.now() print('Stop load sequence data time {}'.format(str(end_time))) elapsed_time = end_time - start_time print('Elapsed load sequence data time {}'.format(str(elapsed_time))) elif features == False: start_time = dt.datetime.now() print('Start data import {}'.format(str(start_time))) generator = data.load_generator('train', batch_size=batch_size, num_classes=num_classes, regeneration=True) test_generator = data.load_generator('test', batch_size=batch_size, num_classes=num_classes, regeneration=True) end_time = dt.datetime.now() print('Stop load data time {}'.format(str(end_time))) elapsed_time = end_time - start_time print('Elapsed load data time {}'.format(str(elapsed_time))) # Get the model. rm = ResearchModels(num_classes, model, seq_length, saved_model) # Fit! if features or features == None: # used for LSTM (feauters loaded after InceptionV3) start_time = dt.datetime.now() print('Start sequence train data fit {}'.format(str(start_time))) rm.model.fit(X_train, y_train, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpointer], epochs=nb_epoch) end_time = dt.datetime.now() print('Stop sequence train data fit {}'.format(str(end_time))) elapsed_time = end_time - start_time print('Elapsed sequence train data fitting time {}'.format( str(elapsed_time))) elif features == False: # Use standard fit (all other research models) start_time = dt.datetime.now() print('Start train data fit {}'.format(str(start_time))) rm.model.fit_generator( generator=generator, steps_per_epoch= 1035, # ~ 16725/32 = 522 and 35108/16 +-= 2200 and (3619(lowest examples num for class)*10)/30=1206 and 4084*10/30=1361 epochs=nb_epoch, verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpointer], validation_data=test_generator, validation_steps= 125, # ~ 2008/32 = 62.75 and 4817/16 -+= 305 and (474(lowest examples num for class)*10)/30=158 and 486*10/30=162 workers=4) end_time = dt.datetime.now() print('Stop train data fit {}'.format(str(end_time))) elapsed_time = end_time - start_time print('Elapsed train data fitting time {}'.format(str(elapsed_time)))
def train(inDir, dataDir, seqName, seq_length, model, batch_size, nb_epoch, featureLength, SVDFeatLen, modNumber): modelNameInt = dataDir + seqName + '_' + model data = DataSet(seqName, seq_length, inDir, dataDir, SVDFeatLen, modNumber) if SVDFeatLen == -1: X_train, Y_train, X_test, Y_test = data.get_all_sequences_in_memory_prop( 0.2) else: X_train, Y_train, X_test, Y_test = data.get_all_sequences_in_memory_svd( 0.2) # Non Keras models 'Random Forest: RF....xgboost: xgb.....svm' are treated # separately here. None are currently out performing keras based models if model == 'RF': Y_trainI = np.int64(Y_train) Y_testI = np.int64(Y_test) fX_train = X_train.reshape(X_train.shape[0], seq_length * featureLength) fX_test = X_test.reshape(X_test.shape[0], seq_length * featureLength) #scaling = MinMaxScaler(feature_range=(-1,1)).fit(fX) #fX = scaling.transform(fX) #fX_test = scaling.transform(fX_test)s rf = RandomForestClassifier(n_estimators=1000, criterion='entropy', max_depth=14, max_features='auto', random_state=42) ## This line instantiates the model. #param_grid = {'n_estimators': [900, 1100],'max_features': ['auto', 'sqrt', 'log2'], # 'max_depth' : [16,18,20,22], 'criterion' :['gini', 'entropy'] } #rf = GridSearchCV(estimator=rf, param_grid=param_grid, cv= 5) ## Fit the model on your training data. rf.fit(fX_train, Y_trainI[:, 1]) ## And score it on your testing data. rfScore = rf.score(fX_test, Y_testI[:, 1]) np.savetxt('rfImports.txt', rf.feature_importances_) print("RF Score = %f ." % rfScore) rfe = RFE(rf, n_features_to_select=1000, verbose=3) rfe.fit(fX_train, Y_trainI[:, 1]) ## And score it on your testing data. rfeScore = rfe.score(fX_test, Y_testI[:, 1]) np.savetxt('rfe.txt', rfe.ranking_) print("RFE Score = %f ." % rfeScore) elif model == 'xgb': # Train xgboost Y_trainI = np.int64(Y_train) Y_testI = np.int64(Y_test) fX_train = X_train.reshape(X_train.shape[0], seq_length * featureLength) fX_test = X_test.reshape(X_test.shape[0], seq_length * featureLength) dtrain = xgb.DMatrix(fX_train, Y_trainI) dtest = xgb.DMatrix(fX_test, Y_testI) param = { 'max_depth': 3, 'eta': 0.1, 'objective': 'binary:logistic', 'seed': 42 } num_round = 50 bst = xgb.train(param, dtrain, num_round, [(dtest, 'test'), (dtrain, 'train')]) preds = bst.predict(dtest) preds[preds > 0.5] = 1 preds[preds <= 0.5] = 0 print("XGB score = %f ." % accuracy_score(preds, Y_testI)) elif model == 'svm': #Currently, SVMs do not work for very large bottleneck features. #tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-2, 1e-3, 1e-4, 1e-5], # 'C': [0.001, 0.10, 0.1, 10, 25, 50, 100, 1000]}] tuned_parameters = [{ 'kernel': ['rbf'], 'gamma': [1e-2, 1e-4], 'C': [0.10, 10, 50, 1000] }] Y_trainI = np.int64(Y_train) Y_testI = np.int64(Y_test) Cs = [0.01, 0.1] gammas = [0.01, 0.1] param_grid = {'C': Cs, 'gamma': gammas} clf = GridSearchCV(SVC(kernel='rbf'), param_grid, cv=2) fX_train = X_train.reshape(X_train.shape[0], seq_length * featureLength) scaling = MinMaxScaler(feature_range=(-1, 1)).fit(fX) fX_train = scaling.transform(fX_train) fX_test = X_test.reshape(X_test.shape[0], seq_length * featureLength) fX_test = scaling.transform(fX_test) clf.fit(fX_train, Y_trainI[:, 1]) svmScore = clf.score(fX_test, Y_testI[:, 1]) print("SVM score = %f ." % svmScore) else: modelName = modelNameInt + '.h5' modelNameBest = modelNameInt + '_best.h5' checkpointer = ModelCheckpoint( filepath=os.path.join(dataDir, 'checkpoints', model + \ '.{epoch:03d}-{val_loss:.3f}.hdf5'), verbose=1, save_best_only=True) # Helper: TensorBoard tb = TensorBoard(log_dir=os.path.join(dataDir, 'logs', model)) # Helper: Stop when we stop learning. early_stopper = EarlyStopping(monitor='val_acc', patience=500, mode='auto') # Helper: Save results. timestamp = time.time() csv_logger = CSVLogger(os.path.join(dataDir, 'logs', model + '-' + 'training-' + \ str(timestamp) + '.log')) # Get the model. rm = ResearchModels(model, seq_length, None, features_length=featureLength) filepath = dataDir + "weightsbest.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') rm.model.fit(X_train, Y_train, batch_size=batch_size, validation_split=0.1, verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpoint], epochs=nb_epoch) rm.model.save(modelName) rm.model.load_weights(filepath) rm.model.save(modelNameBest) scores = rm.model.evaluate(X_test, Y_test, verbose=1) print("%s: %.2f%%" % (rm.model.metrics_names[1], scores[1] * 100))
def train(istrain=True, model='visual_model', saved_model_path=None, task='arousal', batch_size=2, nb_epoch=200, learning_r=1e-3): """ train the model :param model: 'visual_model','audio_model','word_model','trimodal_model' :param saved_model_path: saved_model path :param task: 'aoursal','valence','emotion' :param batch_size: 2 :param nb_epoch:2100 :return:s """ timestamp = time.strftime('%Y-%m-%d-%H:%M:%S', time.localtime(time.time())) # Helper: Save the model. if not os.path.exists(os.path.join('checkpoints', model)): os.makedirs(os.path.join('checkpoints', model)) checkpointer = ModelCheckpoint( #filepath = os.path.join('checkpoints', model, task+'-'+ str(timestamp)+'-'+'best.hdf5' ), filepath=os.path.join('checkpoints', model, task + '-' + str(timestamp) + '-' + 'best.hdf5'), verbose=1, save_best_only=True) # Helper: TensorBoard tb = TensorBoard(log_dir=os.path.join('logs', model)) # Helper: Stop when we stop learning. early_stopper = EarlyStopping(patience=20) # Helper: Save results. csv_logger = CSVLogger(os.path.join('logs', model , task +'-'+ \ str(timestamp) + '.log')) # Get the data and process it. # seq_length for the sentence seq_length = 20 dataset = DataSet(istrain=istrain, model=model, task=task, seq_length=seq_length) # Get the model. rm = ResearchModels(istrain=istrain, model=model, seq_length=seq_length, saved_model_path=saved_model_path, task_type=task, saved_audio_model=None, saved_visual_model=None, saved_word_model=None, learning_r=learning_r) # Get training and validation data. x_train, y_train, train_name_list = dataset.get_all_sequences_in_memory( 'Train') x_valid, y_valid, valid_name_list = dataset.get_all_sequences_in_memory( 'Validation') # Fit! # Use standard fit. rm.model.fit( x_train, y_train, batch_size=batch_size, validation_data=(x_valid, y_valid), verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpointer], #callbacks=[tb, lrate, csv_logger, checkpointer], epochs=nb_epoch) # find the current best model and get its prediction on validation set model_weights_path = os.path.join( 'checkpoints', model, task + '-' + str(timestamp) + '-' + 'best.hdf5') best_model = load_custom_model(model_weights_path) y_valid_pred = best_model.predict(x_valid) y_valid_pred = np.squeeze(y_valid_pred) y_train_pred = best_model.predict(x_train) y_train_pred = np.squeeze(y_train_pred) #calculate the ccc and mse if task in ['arousal', 'valence']: print("The CCC in validation set is {}".format( ccc(y_valid, y_valid_pred)[0])) print("The mse in validation set is {}".format( mse(y_valid, y_valid_pred))) print("The CCC in train set is {}".format( ccc(y_train, y_train_pred)[0])) print("The mse in train set is {}".format(mse(y_train, y_train_pred))) elif task == "emotion": print("F1 score in validation set is {}".format( f1(y_valid, y_valid_pred))) # display the prediction and true label log_path = os.path.join('logs', model , task +'-'+ \ str(timestamp) + '.log') display_true_vs_pred([y_valid, y_train], [y_valid_pred, y_train_pred], log_path, task, model)
def train(inDir, dataDir, seqName, seq_length, model, batch_size, nb_epoch, featureLength, SVDFeatLen, modNumber): seed = 2 modelNameInt = dataDir + seqName + '_' + model data = DataSet(seqName, seq_length, inDir, dataDir, SVDFeatLen, modNumber) X, Yhot = data.get_all_sequences_in_memory() Y = Yhot[:,1] kfold = ShuffleSplit(n_splits=5, random_state=seed) cvAC = [] cvF1 = [] cvKappa = [] """Loop through Train and Test CV Datasets""" for train, test in kfold.split(X, Y): X_train = X[train] X_test = X[test] Y_train = Yhot[train] Y_test = Yhot[test] # Non Keras models 'Random Forest: RF....xgboost: xgb.....svm' are treated # separately here. None are currently out performing keras based models if model == 'RF': Y_trainI = np.int64(Y_train) Y_testI = np.int64(Y_test) fX_train = X_train.reshape(X_train.shape[0], seq_length*featureLength) fX_test = X_test.reshape(X_test.shape[0], seq_length*featureLength) #scaling = MinMaxScaler(feature_range=(-1,1)).fit(fX) #fX = scaling.transform(fX) #fX_test = scaling.transform(fX_test)s rf=RandomForestClassifier(n_estimators=1000, criterion='entropy', max_depth=14, max_features='auto', random_state=42) ## This line instantiates the model. #param_grid = {'n_estimators': [900, 1100],'max_features': ['auto', 'sqrt', 'log2'], # 'max_depth' : [16,18,20,22], 'criterion' :['gini', 'entropy'] } #rf = GridSearchCV(estimator=rf, param_grid=param_grid, cv= 5) # Fit the model on your training data. rf.fit(fX_train, Y_trainI[:,1]) yhat1 = rf.predict(fX_test) ## And score it on your testing data. rfScore = rf.score(fX_test, Y_testI[:,1]) #np.savetxt('rfImports.txt', rf.feature_importances_); #print("RF Score = %f ." % rfScore) #rfe = RFE(rf, n_features_to_select=1000, verbose =3 ) #rfe.fit(fX_train, Y_trainI[:,1]) ## And score it on your testing data. #rfeScore = rfe.score(fX_test, Y_testI[:,1]) #np.savetxt('rfe.txt', rfe.ranking_); #print("RFE Score = %f ." % rfeScore) elif model == 'xgb': # Train xgboost Y_trainI = np.int64(Y_train) Y_testI = np.int64(Y_test) fX_train = X_train.reshape(X_train.shape[0], seq_length*featureLength) fX_test = X_test.reshape(X_test.shape[0], seq_length*featureLength) dtrain = xgb.DMatrix(fX_train, Y_trainI) dtest = xgb.DMatrix(fX_test, Y_testI) clf_xgb = XGBClassifier(objective = 'binary:logistic') param_dist = {'n_estimators': stats.randint(150, 500), 'learning_rate': stats.uniform(0.01, 0.07), 'subsample': stats.uniform(0.3, 0.7), 'max_depth': [3, 4, 5, 6, 7, 8, 9], 'colsample_bytree': stats.uniform(0.5, 0.45), 'min_child_weight': [1, 2, 3] } clf = RandomizedSearchCV(clf_xgb, param_distributions = param_dist, n_iter = 25, scoring = 'f1', error_score = 0, verbose = 3, n_jobs = -1) param = {'max_depth' : 3, 'eta' : 0.1, 'objective' : 'binary:logistic', 'seed' : 42} num_round = 50 bst = xgb.train(param, dtrain, num_round, [(dtest, 'test'), (dtrain, 'train')]) clf.fit(train,test) #print clf.best_params_ yhat1 = bst.predict(dtest) yhat1[yhat1 > 0.5] = 1 yhat1[yhat1 <= 0.5] = 0 #print("XGB score = %f ." % accuracy_score(preds, Y_testI)) elif model == 'svm': #Currently, SVMs do not work for very large bottleneck features. #tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-2, 1e-3, 1e-4, 1e-5], # 'C': [0.001, 0.10, 0.1, 10, 25, 50, 100, 1000]}] tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-2, 1e-4], 'C': [0.10, 10, 50, 1000]}] Y_trainI = np.int64(Y_train) Y_testI = np.int64(Y_test) fX_train = X_train.reshape(X_train.shape[0], seq_length*featureLength) fX_test = X_test.reshape(X_test.shape[0], seq_length*featureLength) clf = SVC(C=1.0, kernel='rbf') clf.fit(fX_train, Y_trainI[:,1]) yhat1 = clf.predict(fX_test) else: modelName = modelNameInt + '.h5' modelNameBest = modelNameInt + '_best.h5' checkpointer = ModelCheckpoint( filepath=os.path.join(dataDir, 'checkpoints', model + \ '.{epoch:03d}-{val_loss:.3f}.hdf5'), verbose=1, save_best_only=True) # Helper: TensorBoard tb = TensorBoard(log_dir=os.path.join(dataDir, 'logs', model)) # Helper: Stop when we stop learning. early_stopper = EarlyStopping(monitor='val_accuracy', patience=40, mode='auto') # Helper: Save results. timestamp = time.time() csv_logger = CSVLogger(os.path.join(dataDir, 'logs', model + '-' + 'training-' + \ str(timestamp) + '.log')) # Get the model. rm = ResearchModels(model, seq_length, None,features_length=featureLength) filepath=dataDir + "weightsbest.hdf5" checkpoint = ModelCheckpoint(filepath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') history = rm.model.fit( X_train, Y_train, batch_size=batch_size, validation_split=0.1, verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpoint], epochs=nb_epoch) rm.model.save(modelName) rm.model.load_weights(filepath) rm.model.save(modelNameBest) yhat = rm.model.predict(X_test) yhat1 = np.argmax(yhat, axis=1) Y_test1 = np.argmax(Y_test, axis=1) Ac = accuracy_score(Y_test1,yhat1) print("ac: %.2f%%" % Ac) F1 = f1_score(Y_test1,yhat1) print("f1: %.2f%%" % F1) Kappa = cohen_kappa_score(Y_test1,yhat1) print("kappa: %.2f%%" % Kappa) #scores = rm.model.evaluate(X_test, Y_test, verbose=1) #print("%s: %.2f%%" % (rm.model.metrics_names[1], scores[1]*100)) cvAC.append(Ac) cvF1.append(F1) cvKappa.append(Kappa) del rm.model, history be.clear_session(); resetKeras() cvACn = np.array(cvAC) cvF1n = np.array(cvF1) cvKappan = np.array(cvKappa) file1 = open(seqName+'_'+model+".txt","w")#write mode file1.write("Accuracy: %0.2f (+/- %0.2f)" % (cvACn.mean(), cvACn.std() * 2)) file1.write("F1: %0.2f (+/- %0.2f)" % (cvF1n.mean(), cvF1n.std() * 2)) file1.write("Kappa: %0.2f (+/- %0.2f)" % (cvKappan.mean(), cvKappan.std() * 2)) file1.close()
def train(data_type, seq_length, model, saved_model=None, class_limit=None, image_shape=None, config=None): if config is not None: load_to_memory = config.videoLoadToMemory batch_size = config.videoBatchSize nb_epoch = config.videoEpochs repo_dir = config.repoDir feature_file_path = config.featureFileName work_dir = config.workDir lr = config.videoLearningRate decay = config.videoDecay classlist = config.classes else: load_to_memory = False batch_size = 32 nb_epoch = 100 repo_dir = '' feature_file_path = 'data/data_file.csv' work_dir = 'data' lr = 1e-5 decay = 1e-6 classlist = [] # Helper: Save the model. checkpointpath = os.path.join(work_dir, 'checkpoints') if not os.path.exists(checkpointpath): print("Creating checkpoint folder [%s]", checkpointpath) os.makedirs(checkpointpath) checkpointer = ModelCheckpoint( filepath=os.path.join(work_dir, 'checkpoints', model + '-' + data_type + \ '.{epoch:03d}-{val_loss:.3f}.hdf5'), verbose=1, save_best_only=True) # Helper: TensorBoard logpath = os.path.join(work_dir, 'logs') if not os.path.exists(logpath): print("Creating log folder [%s]", logpath) os.makedirs(logpath) tb = TensorBoard(log_dir=os.path.join(work_dir, 'logs', model)) # Helper: Stop when we stop learning. early_stopper = EarlyStopping(patience=5) # Helper: Save results. timestamp = time.time() csv_logger = CSVLogger(os.path.join(logpath, model + '-' + 'training-' + \ str(timestamp) + '.log')) # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit, repo_dir=repo_dir, feature_file_path=feature_file_path, work_dir=work_dir, classlist=classlist) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape, repo_dir=repo_dir, feature_file_path=feature_file_path, work_dir=work_dir, classlist=classlist) # Check if data is sufficient if False == data.check_data(batch_size): print("Insufficient data") sys.exit(0) # Get samples per epoch. # Multiply by 0.7 to attempt to guess how much of data.data is the train set. steps_per_epoch = (len(data.data) * 0.7) // batch_size if load_to_memory: # Get data. X, y = data.get_all_sequences_in_memory('train', data_type) X_test, y_test = data.get_all_sequences_in_memory('test', data_type) else: # Get generators. generator = data.frame_generator(batch_size, 'train', data_type) val_generator = data.frame_generator(batch_size, 'test', data_type) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model, lr, decay) # Fit! if load_to_memory: # Use standard fit. rm.model.fit(X, y, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1, callbacks=[tb, early_stopper, csv_logger], epochs=nb_epoch) else: # Use fit generator. rm.model.fit_generator( generator=generator, steps_per_epoch=steps_per_epoch, epochs=nb_epoch, verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpointer], validation_data=val_generator, validation_steps=40, workers=4)
def train(data_type, seq_length, model, saved_model=None, class_limit=None, image_shape=None, load_to_memory=False, batch_size=32, nb_epoch=100): # Helper: Save the model. checkpointer = ModelCheckpoint( filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + \ '.best2.hdf5'), verbose=1, save_best_only=True) # Helper: TensorBoard tb = TensorBoard(log_dir=os.path.join('data', 'logs', model)) # Helper: Stop when we stop learning. early_stopper = EarlyStopping(patience=5) # Helper: Save results. timestamp = time.time() csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + \ str(timestamp) + '.log')) # Get the data and process it. if image_shape is None: data = DataSet( seq_length=seq_length, class_limit=class_limit ) else: data = DataSet( seq_length=seq_length, class_limit=class_limit, image_shape=image_shape ) # Get samples per epoch. # Multiply by 0.7 to attempt to guess how much of data.data is the train set. steps_per_epoch = (len(data.data) * 0.7) // batch_size if load_to_memory: # Get data. X, y = data.get_all_sequences_in_memory('train', data_type) X_test, y_test = data.get_all_sequences_in_memory('test', data_type) else: # Get generators. generator = data.frame_generator(batch_size, 'train', data_type) val_generator = data.frame_generator(batch_size, 'test', data_type) # Get the model. rm = ResearchModels(len(data.classes), model, seq_length, saved_model) # Balance the class weights! print("setting weights!:") flashing = 0 not_flashing = 0 unknown = 0 for label in y: if label[0]: flashing = flashing + 1 elif label[1]: not_flashing = not_flashing + 1 else: unknown = unknown + 1 raw = [flashing,not_flashing,unknown] dist = [sum(raw)/float(i) for i in raw] class_weights = {1:dist[0], 2:dist[1], 3:dist[2]} print(class_weights) # Use custom metrics because acc is garbage print("setting metrics!") metrics = Metrics() # Fit! if load_to_memory: # Use standard fit. rm.model.fit( X, y, batch_size=batch_size, validation_data=(X_test, y_test), verbose=1, callbacks=[tb,metrics], epochs=nb_epoch) else: # Use fit generator. rm.model.fit_generator( generator=generator, steps_per_epoch=steps_per_epoch, epochs=nb_epoch, verbose=1, callbacks=[tb, early_stopper, csv_logger, checkpointer], validation_data=val_generator, validation_steps=40, workers=4)
def validate(data_type, model, excel_save_path, learning_rate, learning_decay, seq_length=40, saved_model=None, class_limit=None, image_shape=None): # Get the data and process it. if image_shape is None: data = DataSet(seq_length=seq_length, class_limit=class_limit) else: data = DataSet(seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) if model == 'lstm_regression': regression = 1 sequence_len = 2 # for researchmodel else: regression = 0 sequence_len = seq_length train, test = data.split_train_test() rm = ResearchModels(len(data.classes), model, sequence_len, learning_rate, learning_decay, saved_model) final_result_list = () for sample in test: movie_id = sample[2] if movie_id.split('_')[2] == '277235': # exclude from validation print(movie_id) continue p_generator = data.predict_generator(sample, data_type, regression) predict_output = rm.model.predict_generator(generator=p_generator, steps=1) if regression == 0: if sample[1] == 'normal': truth = 0 elif sample[1] == 'mild': truth = 1 else: truth = 2 if np.argmax( predict_output[0] ) == 0: # mild = [1,0,0], normal=[0,1,0],severe = [0,0,1] predict = 1 elif np.argmax(predict_output[0]) == 1: predict = 0 else: predict = 2 else: truth = float(sample[2].split('_')[-2]) predict = predict_output[0][0] result = [movie_id, truth, predict] final_result_list = (*final_result_list, result) par = [('movie_ID', 1), ('truth', 1), ('predict', 1)] ff.xlsx_save(excel_save_path, final_result_list, par, list(range(0, 3)))