def main(): model = 'lstm_regression' rank = '' saved_model = os.path.join(main_folder, 'checkpoints', model + rank, 'lstm_regression-101.hdf5') seq_len = 20 learning_rate = 1e-4 learning_decay = 1e-5 # create folder to save result as excel file excel_folder = os.path.join(main_folder, 'validation_result') ff.make_folder([excel_folder]) excel_save_path = os.path.join(excel_folder, model + rank + '_validation.xlsx') if model == 'conv_3d' or model == 'lrcn': data_type = 'images' image_shape = (80, 80, 3) else: data_type = 'features' image_shape = None validate(data_type, model, excel_save_path, learning_rate, learning_decay, seq_length=seq_len, saved_model=saved_model, image_shape=image_shape, class_limit=None)
def copy_files(main_path, file_groups): # make main folder for train and test folder train_folder = os.path.join(main_path, 'train') test_folder = os.path.join(main_path, 'test') ff.make_folder([train_folder, test_folder]) # Do each of our groups. for group, videos in file_groups.items(): # Do each of our videos. for video in videos: # Get the parts. parts = video.split(os.path.sep) filename = parts[len(parts) - 1] strain_reduction = float(filename.split('_')[-2]) if strain_reduction <= 0.2: classname = 'normal' elif strain_reduction >= 0.7: classname = 'severe' elif strain_reduction >= 0.4 and strain_reduction <= 0.6: classname = 'mild' else: print('Error!') break # Check if this class exists. if not os.path.exists(os.path.join(main_path, group, classname)): print("Creating folder for %s/%s" % (group, classname)) os.makedirs(os.path.join(main_path, group, classname)) # Check if we have already moved this file, or at least that it # exists to move. if os.path.exists( os.path.join(main_path, group, classname, filename)): print(" find %s in the destination. Skipping." % (filename)) continue if not os.path.exists( os.path.join(main_path, group, classname, filename)): print(" can't find %s in the destination. copy it to %s." % (filename, classname)) # copy the file destination = os.path.join(main_path, group, classname, filename) shutil.copyfile(video, destination) print('done copy')
def extract_timeframes(main_path, movie_path, excel_file): """After we have all of our videos split between train and test, and all nested within folders representing their classes, we need to make a data file that we can reference when training our RNN(s). This will let us keep track of image sequences and other parts of the training process. We'll first need to extract images from each of the videos. We'll need to record the following data in the file: [train|test] or batch, class, filename, nb frames Extracting can be done with ffmpeg: `ffmpeg -i video.mpg image-%04d.jpg` """ data = [] # create image folder image_folder = os.path.join(main_path, 'images') ff.make_folder([image_folder]) # find all the movies excel_file = pd.read_excel(excel_file) movie_list = excel_file['video_name'] # extract time frames from each movie: for i in range(0, excel_file.shape[0]): case = excel_file.iloc[i] print(i, case['video_name']) # set the file name for images file_name = case['video_name'] file_name_sep = file_name.split('.') # remove .avi file_name_no_ext = file_name_sep[0] if len(file_name_sep) > 1: for ii in range(1, len(file_name_sep) - 1): file_name_no_ext += '.' file_name_no_ext += file_name_sep[ii] save_folder = os.path.join(image_folder, file_name_no_ext) ff.make_folder([save_folder]) src = os.path.join(movie_path, file_name) if os.path.isfile(src) == 0: ValueError('no movie file') if os.path.isfile( os.path.join(save_folder, file_name_no_ext + '-0001.jpg')) == 0: cap = cv2.VideoCapture(src) count = 1 frameRate = 1 while (cap.isOpened()): frameId = cap.get(1) # current frame number ret, frame = cap.read() if (ret != True): break if (frameId % math.floor(frameRate) == 0): if count < 10: number = '000' + str(count) if count >= 10: number = '00' + str(count) dest = os.path.join(save_folder, file_name_no_ext + '-' + number + '.jpg') cv2.imwrite(dest, frame) count += 1 cap.release() # call(["ffmpeg", "-i", src, dest]) % this will cause some error (not extract exact 20 frames) in some avis # Now get how many frames it is. nb_frames = len(ff.find_all_target_files(['*.jpg'], save_folder)) data.append([case['video_name'], file_name_no_ext, nb_frames])
def train(data_type, batch, seq_length, model, learning_rate,learning_decay,saved_model=None, class_limit=None, image_shape=None, load_to_memory=False, batch_size=32, nb_epoch=100): if model == 'lstm_regression': regression = 1 monitor_par = 'val_loss' sequence_len = 2 else: regression = 0 monitor_par = 'val_acc' sequence_len = seq_length # Helper: Save the model. save_folder = os.path.join(cg.nas_main_dir,'models') model_save_folder = os.path.join(save_folder,model) model_save_folder2 = os.path.join(model_save_folder,'batch_' + str(batch)) log_save_folder = os.path.join(save_folder,'logs') ff.make_folder([save_folder,model_save_folder,model_save_folder2, log_save_folder]) checkpointer = ModelCheckpoint( filepath=os.path.join(model_save_folder2, model+ '-batch'+str(batch)+'-{epoch:03d}.hdf5'), monitor=monitor_par, verbose=1, save_best_only=True) # # Helper: TensorBoard # tb = TensorBoard(log_dir=os.path.join('data', 'logs', model)) # # Helper: Stop when we stop learning. # early_stopper = EarlyStopping(patience=5) # Helper: Save results. #timestamp = time.time() csv_logger = CSVLogger(os.path.join(log_save_folder, model + '-batch' + str(batch) + '-training-log' + '.csv')) # Get the data and process it. if image_shape is None: data = DataSet( validation_batch = batch, seq_length=seq_length, class_limit=class_limit ) else: data = DataSet( validation_batch = batch, seq_length=seq_length, class_limit=class_limit, image_shape=image_shape ) # Get samples per epoch. #steps_per_epoch = (training_data_num) // batch_size train_data,test_data = data.split_train_test() print('training num: ',len(train_data),'testing num: ',len(test_data)) steps_per_epoch_train = len(train_data) // batch_size print('step in the training is: %d'%steps_per_epoch_train) steps_per_epoch_test = len(test_data) // batch_size print('step in the test is: %d'%steps_per_epoch_test) # if load_to_memory: # # Get data. # X, y = data.get_all_sequences_in_memory('train', data_type) # X_test, y_test = data.get_all_sequences_in_memory('test', data_type) # else: # Get generators. generator = data.frame_generator(batch_size, 'train', data_type,regression,True) val_generator = data.frame_generator(batch_size, 'test', data_type,regression, True) # Get the model. rm = ResearchModels(len(data.classes), model, sequence_len, learning_rate,learning_decay,saved_model) # Fit! # if load_to_memory: # # Use standard fit. # hist = rm.model.fit( # X, # y, # batch_size=batch_size, # validation_data=(X_test, y_test), # verbose=1, # callbacks=[csv_logger], # epochs=nb_epoch) # else: # Use fit generator. hist = rm.model.fit_generator( generator=generator, steps_per_epoch=steps_per_epoch_train, # in each epoch all the training data are evaluated epochs=nb_epoch, verbose=1, callbacks=[csv_logger, checkpointer], validation_data=val_generator, validation_steps=steps_per_epoch_test, workers=1) # if you see that GPU is idling and waiting for batches, try to increase the amout of workers return hist
# if no csv_file, run pre_assign_classes.py csv_file = pd.read_excel( os.path.join(cg.nas_main_dir, 'movie_list_w_classes.xlsx')) if 'batch' in csv_file.columns: print('already done partition') else: patient_id_list = np.unique(csv_file['patient_id']) patient_list = [] for p in patient_id_list: d = csv_file[csv_file['patient_id'] == p].iloc[0] patient_list.append([d['patient_class'], p]) np.random.shuffle(patient_list) a = np.array_split(patient_list, cg.num_partitions) # into 5 batches ff.make_folder([os.path.join(cg.nas_main_dir, 'partitions')]) for i in range(0, cg.num_partitions): np.save( os.path.join(cg.nas_main_dir, 'partitions', 'batch_' + str(i) + '.npy'), a[i]) # change the excel file batch_list = [] for j in range(0, csv_file.shape[0]): case = csv_file.iloc[j] for batch in range(0, cg.num_partitions): if np.isin(case['patient_id'], a[batch]) == 1: batch_list.append([batch, case['video_name']]) batch_df = pd.DataFrame(batch_list, columns=['batch', 'video_name']) # merge two dataframe
main_path = cg.local_dir # Set defaults. seq_length = 20 class_limit = None # Number of classes to extract. Can be 1-101 or None for all. # Get the dataset. data = DataSet(validation_batch=0, seq_length=seq_length, class_limit=class_limit) # get the model. model = Extractor() # get a folder for sequence save ff.make_folder([os.path.join(main_path, 'sequences')]) # Loop through data. pbar = tqdm(total=len(data.data)) for case in data.data: file_name = case['video_name'] file_name_sep = file_name.split('.') # remove .avi file_name_no_ext = file_name_sep[0] if len(file_name_sep) > 1: for ii in range(1, len(file_name_sep) - 1): file_name_no_ext += '.' file_name_no_ext += file_name_sep[ii] # Get the path to the sequence for this video. path = os.path.join(main_path, 'sequences', file_name_no_ext + '-' + str(seq_length) + \
'''this script transfers video files from NAS drive to octomore''' import glob import os import os.path import numpy as np import shutil import settings import function_list as ff cg = settings.Experiment() nas_movie_folder = os.path.join(cg.nas_main_dir, 'movie') movie_list = ff.find_all_target_files(['*avi'], nas_movie_folder) print(movie_list.shape) # make folder in octomore local_folder = os.path.join(cg.local_dir, 'original_movie') ff.make_folder([local_folder]) # copy to octomore for m in movie_list: if os.path.exists(os.path.join(local_folder, os.path.basename(m))): print(" find %s in the destination. Skipping." % (os.path.basename(m))) continue else: shutil.copyfile(m, os.path.join(local_folder, os.path.basename(m))) # check whether the transfer is completed l = ff.find_all_target_files(['*.avi'], local_folder) print(l.shape)
def extract_files(main_path): """After we have all of our videos split between train and test, and all nested within folders representing their classes, we need to make a data file that we can reference when training our RNN(s). This will let us keep track of image sequences and other parts of the training process. We'll first need to extract images from each of the videos. We'll need to record the following data in the file: [train|test], class, filename, nb frames Extracting can be done with ffmpeg: `ffmpeg -i video.mpg image-%04d.jpg` """ data_file = [] folders = ['train', 'test'] # create image folder train_image_f = os.path.join(main_path, 'train_image') test_image_f = os.path.join(main_path, 'test_image') ff.make_folder([train_image_f, test_image_f]) for folder in folders: class_folders = glob.glob(os.path.join(main_path, folder, '*')) for vid_class in class_folders: class_files = glob.glob(os.path.join(vid_class, '*.avi')) for video_path in class_files: # Get the parts of the file. video_parts = get_video_parts(video_path, full_length=True) train_or_test, classname, filename_no_ext, filename = video_parts print(train_or_test, classname, filename_no_ext, filename) # check whether the folder to save images has been created if not os.path.exists( os.path.join(main_path, train_or_test + '_image', classname)): print("Creating folder for %s/%s" % (train_or_test + '_image', classname)) os.makedirs( os.path.join(main_path, train_or_test + '_image', classname)) # Only extract if we haven't done it yet. Otherwise, just get # the info. if not check_already_extracted(video_parts, main_path): #Now extract it. print("%s not exist, extract" % filename) src = os.path.join(main_path, train_or_test, classname, filename) cap = cv2.VideoCapture(src) count = 1 frameRate = 1 while (cap.isOpened()): frameId = cap.get(1) # current frame number ret, frame = cap.read() if (ret != True): break if (frameId % math.floor(frameRate) == 0): if count < 10: n = '000' + str(count) if count >= 10: n = '00' + str(count) dest = os.path.join(main_path, train_or_test + '_image', classname, filename_no_ext + '-' + n + '.jpg') cv2.imwrite(dest, frame) count += 1 cap.release() # call(["ffmpeg", "-i", src, dest]) % this will cause some error (not extract exact 20 frames) in some avis # Now get how many frames it is. nb_frames = get_nb_frames_for_video(video_parts, main_path) data_file.append( [train_or_test, classname, filename_no_ext, nb_frames]) print("Generated %d frames for class %s, filename %s" % (nb_frames, classname, filename_no_ext)) excel_file = os.path.join(main_path, 'data_file.csv') with open(excel_file, 'w') as fout: writer = csv.writer(fout) writer.writerows(data_file) print("Extracted and wrote %d video files." % (len(data_file)))
def validate(data_type, batch, model, learning_rate, learning_decay, seq_length, saved_model=None, class_limit=None, image_shape=None): # Get the data and process it. if image_shape is None: data = DataSet(validation_batch=batch, seq_length=seq_length, class_limit=class_limit) else: data = DataSet(validation_batch=batch, seq_length=seq_length, class_limit=class_limit, image_shape=image_shape) if model == 'lstm_regression': regression = 1 sequence_len = 2 else: regression = 0 sequence_len = seq_length _, test_data = data.split_train_test() rm = ResearchModels(len(data.classes), model, sequence_len, learning_rate, learning_decay, saved_model) final_result_list = [] for sample in test_data: movie_id = sample['video_name'] p_generator = data.predict_generator(sample, data_type, regression) predict_output = rm.model.predict_generator(generator=p_generator, steps=1) print(predict_output) if regression == 0: if sample['class'] == 'normal': truth = 0 else: truth = 1 if np.argmax(predict_output[0] ) == 1: # abnormal = [1,0], normal = [0,1] predict = 0 else: predict = 1 else: truth = float(sample['EF']) predict = predict_output[0][0] final_result_list.append([movie_id, truth, predict]) df = pd.DataFrame(final_result_list, columns=['video_name', 'truth', 'predict']) save_folder = os.path.join(cg.nas_main_dir, 'results') ff.make_folder([save_folder]) df.to_excel(os.path.join( save_folder, model + '-batch' + str(batch) + '-validation.xlsx'), index=False)