def main():
    model = 'lstm_regression'
    rank = ''
    saved_model = os.path.join(main_folder, 'checkpoints', model + rank,
                               'lstm_regression-101.hdf5')
    seq_len = 20
    learning_rate = 1e-4
    learning_decay = 1e-5

    # create folder to save result as excel file
    excel_folder = os.path.join(main_folder, 'validation_result')
    ff.make_folder([excel_folder])
    excel_save_path = os.path.join(excel_folder,
                                   model + rank + '_validation.xlsx')

    if model == 'conv_3d' or model == 'lrcn':
        data_type = 'images'
        image_shape = (80, 80, 3)
    else:
        data_type = 'features'
        image_shape = None

    validate(data_type,
             model,
             excel_save_path,
             learning_rate,
             learning_decay,
             seq_length=seq_len,
             saved_model=saved_model,
             image_shape=image_shape,
             class_limit=None)
def copy_files(main_path, file_groups):

    # make main folder for train and test folder
    train_folder = os.path.join(main_path, 'train')
    test_folder = os.path.join(main_path, 'test')
    ff.make_folder([train_folder, test_folder])

    # Do each of our groups.
    for group, videos in file_groups.items():

        # Do each of our videos.
        for video in videos:

            # Get the parts.
            parts = video.split(os.path.sep)
            filename = parts[len(parts) - 1]

            strain_reduction = float(filename.split('_')[-2])

            if strain_reduction <= 0.2:
                classname = 'normal'
            elif strain_reduction >= 0.7:
                classname = 'severe'
            elif strain_reduction >= 0.4 and strain_reduction <= 0.6:
                classname = 'mild'
            else:
                print('Error!')
                break

            # Check if this class exists.
            if not os.path.exists(os.path.join(main_path, group, classname)):
                print("Creating folder for %s/%s" % (group, classname))
                os.makedirs(os.path.join(main_path, group, classname))

            # Check if we have already moved this file, or at least that it
            # exists to move.
            if os.path.exists(
                    os.path.join(main_path, group, classname, filename)):
                print(" find %s in the destination. Skipping." % (filename))
                continue

            if not os.path.exists(
                    os.path.join(main_path, group, classname, filename)):
                print(" can't find %s in the destination. copy it to %s." %
                      (filename, classname))
                # copy the file
                destination = os.path.join(main_path, group, classname,
                                           filename)
                shutil.copyfile(video, destination)
    print('done copy')
Example #3
0
def extract_timeframes(main_path, movie_path, excel_file):
    """After we have all of our videos split between train and test, and
    all nested within folders representing their classes, we need to
    make a data file that we can reference when training our RNN(s).
    This will let us keep track of image sequences and other parts
    of the training process.

    We'll first need to extract images from each of the videos. We'll
    need to record the following data in the file:

    [train|test] or batch, class, filename, nb frames

    Extracting can be done with ffmpeg:
    `ffmpeg -i video.mpg image-%04d.jpg`
    """
    data = []

    # create image folder
    image_folder = os.path.join(main_path, 'images')
    ff.make_folder([image_folder])

    # find all the movies
    excel_file = pd.read_excel(excel_file)
    movie_list = excel_file['video_name']

    # extract time frames from each movie:
    for i in range(0, excel_file.shape[0]):
        case = excel_file.iloc[i]
        print(i, case['video_name'])

        # set the file name for images
        file_name = case['video_name']
        file_name_sep = file_name.split('.')  # remove .avi
        file_name_no_ext = file_name_sep[0]
        if len(file_name_sep) > 1:
            for ii in range(1, len(file_name_sep) - 1):
                file_name_no_ext += '.'
                file_name_no_ext += file_name_sep[ii]
        save_folder = os.path.join(image_folder, file_name_no_ext)
        ff.make_folder([save_folder])

        src = os.path.join(movie_path, file_name)
        if os.path.isfile(src) == 0:
            ValueError('no movie file')

        if os.path.isfile(
                os.path.join(save_folder,
                             file_name_no_ext + '-0001.jpg')) == 0:
            cap = cv2.VideoCapture(src)
            count = 1
            frameRate = 1
            while (cap.isOpened()):
                frameId = cap.get(1)  # current frame number
                ret, frame = cap.read()

                if (ret != True):
                    break
                if (frameId % math.floor(frameRate) == 0):
                    if count < 10:
                        number = '000' + str(count)
                    if count >= 10:
                        number = '00' + str(count)

                dest = os.path.join(save_folder,
                                    file_name_no_ext + '-' + number + '.jpg')
                cv2.imwrite(dest, frame)
                count += 1
            cap.release()
            # call(["ffmpeg", "-i", src, dest]) % this will cause some error (not extract exact 20 frames) in some avis

        # Now get how many frames it is.
        nb_frames = len(ff.find_all_target_files(['*.jpg'], save_folder))
        data.append([case['video_name'], file_name_no_ext, nb_frames])
def train(data_type, batch, seq_length, model, learning_rate,learning_decay,saved_model=None,
          class_limit=None, image_shape=None,
          load_to_memory=False, batch_size=32, nb_epoch=100):

    if model == 'lstm_regression':
        regression = 1
        monitor_par = 'val_loss'
        sequence_len = 2
    else:
        regression = 0
        monitor_par = 'val_acc'
        sequence_len = seq_length

    # Helper: Save the model.
    save_folder = os.path.join(cg.nas_main_dir,'models')
    model_save_folder = os.path.join(save_folder,model)
    model_save_folder2 = os.path.join(model_save_folder,'batch_' + str(batch))
    log_save_folder = os.path.join(save_folder,'logs')
    ff.make_folder([save_folder,model_save_folder,model_save_folder2, log_save_folder])

    checkpointer = ModelCheckpoint(
        filepath=os.path.join(model_save_folder2, model+ '-batch'+str(batch)+'-{epoch:03d}.hdf5'),
        monitor=monitor_par,
        verbose=1,
        save_best_only=True)

    # # Helper: TensorBoard
    # tb = TensorBoard(log_dir=os.path.join('data', 'logs', model))

    # # Helper: Stop when we stop learning.
    # early_stopper = EarlyStopping(patience=5)

    # Helper: Save results.
    #timestamp = time.time()
    csv_logger = CSVLogger(os.path.join(log_save_folder,  model + '-batch' + str(batch) + '-training-log' + '.csv'))

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(
            validation_batch = batch,
            seq_length=seq_length,
            class_limit=class_limit
        )
    else:
        data = DataSet(
            validation_batch = batch,
            seq_length=seq_length,
            class_limit=class_limit,
            image_shape=image_shape
        )

    # Get samples per epoch.
    #steps_per_epoch = (training_data_num) // batch_size
    train_data,test_data = data.split_train_test()
    print('training num: ',len(train_data),'testing num: ',len(test_data))

    steps_per_epoch_train = len(train_data) // batch_size
    print('step in the training is: %d'%steps_per_epoch_train)
    steps_per_epoch_test = len(test_data) // batch_size
    print('step in the test is: %d'%steps_per_epoch_test)

    # if load_to_memory:
    #     # Get data.
    #     X, y = data.get_all_sequences_in_memory('train', data_type)
    #     X_test, y_test = data.get_all_sequences_in_memory('test', data_type)
    # else:
       
    # Get generators.
    generator = data.frame_generator(batch_size, 'train', data_type,regression,True)
    val_generator = data.frame_generator(batch_size, 'test', data_type,regression, True)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, sequence_len, learning_rate,learning_decay,saved_model)

    # Fit!
    # if load_to_memory:
    #     # Use standard fit.
    #     hist = rm.model.fit(
    #         X,
    #         y,
    #         batch_size=batch_size,
    #         validation_data=(X_test, y_test),
    #         verbose=1,
    #         callbacks=[csv_logger],
    #         epochs=nb_epoch)
    # else:
    
    # Use fit generator.

    
    hist = rm.model.fit_generator(
        generator=generator,
        steps_per_epoch=steps_per_epoch_train, # in each epoch all the training data are evaluated
        epochs=nb_epoch,
        verbose=1,
        callbacks=[csv_logger, checkpointer],
        validation_data=val_generator,
        validation_steps=steps_per_epoch_test,
        workers=1) # if you see that GPU is idling and waiting for batches, try to increase the amout of workers
    return hist
Example #5
0
# if no csv_file, run pre_assign_classes.py
csv_file = pd.read_excel(
    os.path.join(cg.nas_main_dir, 'movie_list_w_classes.xlsx'))
if 'batch' in csv_file.columns:
    print('already done partition')
else:
    patient_id_list = np.unique(csv_file['patient_id'])
    patient_list = []
    for p in patient_id_list:
        d = csv_file[csv_file['patient_id'] == p].iloc[0]
        patient_list.append([d['patient_class'], p])

    np.random.shuffle(patient_list)
    a = np.array_split(patient_list, cg.num_partitions)  # into 5 batches

    ff.make_folder([os.path.join(cg.nas_main_dir, 'partitions')])
    for i in range(0, cg.num_partitions):
        np.save(
            os.path.join(cg.nas_main_dir, 'partitions',
                         'batch_' + str(i) + '.npy'), a[i])

    # change the excel file
    batch_list = []
    for j in range(0, csv_file.shape[0]):
        case = csv_file.iloc[j]
        for batch in range(0, cg.num_partitions):
            if np.isin(case['patient_id'], a[batch]) == 1:
                batch_list.append([batch, case['video_name']])
    batch_df = pd.DataFrame(batch_list, columns=['batch', 'video_name'])

    # merge two dataframe
main_path = cg.local_dir

# Set defaults.
seq_length = 20
class_limit = None  # Number of classes to extract. Can be 1-101 or None for all.

# Get the dataset.
data = DataSet(validation_batch=0,
               seq_length=seq_length,
               class_limit=class_limit)

# get the model.
model = Extractor()

# get a folder for sequence save
ff.make_folder([os.path.join(main_path, 'sequences')])

# Loop through data.
pbar = tqdm(total=len(data.data))
for case in data.data:

    file_name = case['video_name']
    file_name_sep = file_name.split('.')  # remove .avi
    file_name_no_ext = file_name_sep[0]
    if len(file_name_sep) > 1:
        for ii in range(1, len(file_name_sep) - 1):
            file_name_no_ext += '.'
            file_name_no_ext += file_name_sep[ii]

    # Get the path to the sequence for this video.
    path = os.path.join(main_path, 'sequences', file_name_no_ext + '-' + str(seq_length) + \
'''this script transfers video files from NAS drive to octomore'''

import glob
import os
import os.path
import numpy as np
import shutil
import settings
import function_list as ff
cg = settings.Experiment()

nas_movie_folder = os.path.join(cg.nas_main_dir, 'movie')
movie_list = ff.find_all_target_files(['*avi'], nas_movie_folder)
print(movie_list.shape)

# make folder in octomore
local_folder = os.path.join(cg.local_dir, 'original_movie')
ff.make_folder([local_folder])

# copy to octomore
for m in movie_list:
    if os.path.exists(os.path.join(local_folder, os.path.basename(m))):
        print(" find %s in the destination. Skipping." % (os.path.basename(m)))
        continue
    else:
        shutil.copyfile(m, os.path.join(local_folder, os.path.basename(m)))

# check whether the transfer is completed
l = ff.find_all_target_files(['*.avi'], local_folder)
print(l.shape)
Example #8
0
def extract_files(main_path):
    """After we have all of our videos split between train and test, and
    all nested within folders representing their classes, we need to
    make a data file that we can reference when training our RNN(s).
    This will let us keep track of image sequences and other parts
    of the training process.

    We'll first need to extract images from each of the videos. We'll
    need to record the following data in the file:

    [train|test], class, filename, nb frames

    Extracting can be done with ffmpeg:
    `ffmpeg -i video.mpg image-%04d.jpg`
    """
    data_file = []
    folders = ['train', 'test']

    # create image folder
    train_image_f = os.path.join(main_path, 'train_image')
    test_image_f = os.path.join(main_path, 'test_image')
    ff.make_folder([train_image_f, test_image_f])

    for folder in folders:
        class_folders = glob.glob(os.path.join(main_path, folder, '*'))

        for vid_class in class_folders:

            class_files = glob.glob(os.path.join(vid_class, '*.avi'))

            for video_path in class_files:
                # Get the parts of the file.

                video_parts = get_video_parts(video_path, full_length=True)

                train_or_test, classname, filename_no_ext, filename = video_parts
                print(train_or_test, classname, filename_no_ext, filename)

                # check whether the folder to save images has been created
                if not os.path.exists(
                        os.path.join(main_path, train_or_test + '_image',
                                     classname)):
                    print("Creating folder for %s/%s" %
                          (train_or_test + '_image', classname))
                    os.makedirs(
                        os.path.join(main_path, train_or_test + '_image',
                                     classname))

                # Only extract if we haven't done it yet. Otherwise, just get
                # the info.

                if not check_already_extracted(video_parts, main_path):
                    #Now extract it.
                    print("%s not exist, extract" % filename)
                    src = os.path.join(main_path, train_or_test, classname,
                                       filename)

                    cap = cv2.VideoCapture(src)
                    count = 1
                    frameRate = 1
                    while (cap.isOpened()):
                        frameId = cap.get(1)  # current frame number
                        ret, frame = cap.read()

                        if (ret != True):
                            break
                        if (frameId % math.floor(frameRate) == 0):
                            if count < 10:
                                n = '000' + str(count)
                            if count >= 10:
                                n = '00' + str(count)

                        dest = os.path.join(main_path,
                                            train_or_test + '_image',
                                            classname,
                                            filename_no_ext + '-' + n + '.jpg')
                        cv2.imwrite(dest, frame)
                        count += 1
                    cap.release()
                    # call(["ffmpeg", "-i", src, dest]) % this will cause some error (not extract exact 20 frames) in some avis

                # Now get how many frames it is.
                nb_frames = get_nb_frames_for_video(video_parts, main_path)

                data_file.append(
                    [train_or_test, classname, filename_no_ext, nb_frames])

                print("Generated %d frames for class %s, filename %s" %
                      (nb_frames, classname, filename_no_ext))

    excel_file = os.path.join(main_path, 'data_file.csv')
    with open(excel_file, 'w') as fout:
        writer = csv.writer(fout)
        writer.writerows(data_file)

    print("Extracted and wrote %d video files." % (len(data_file)))
def validate(data_type,
             batch,
             model,
             learning_rate,
             learning_decay,
             seq_length,
             saved_model=None,
             class_limit=None,
             image_shape=None):

    # Get the data and process it.
    if image_shape is None:
        data = DataSet(validation_batch=batch,
                       seq_length=seq_length,
                       class_limit=class_limit)
    else:
        data = DataSet(validation_batch=batch,
                       seq_length=seq_length,
                       class_limit=class_limit,
                       image_shape=image_shape)

    if model == 'lstm_regression':
        regression = 1
        sequence_len = 2
    else:
        regression = 0
        sequence_len = seq_length

    _, test_data = data.split_train_test()
    rm = ResearchModels(len(data.classes), model, sequence_len, learning_rate,
                        learning_decay, saved_model)

    final_result_list = []
    for sample in test_data:
        movie_id = sample['video_name']

        p_generator = data.predict_generator(sample, data_type, regression)
        predict_output = rm.model.predict_generator(generator=p_generator,
                                                    steps=1)
        print(predict_output)

        if regression == 0:
            if sample['class'] == 'normal':
                truth = 0
            else:
                truth = 1
            if np.argmax(predict_output[0]
                         ) == 1:  # abnormal = [1,0], normal = [0,1]
                predict = 0
            else:
                predict = 1
        else:
            truth = float(sample['EF'])
            predict = predict_output[0][0]

        final_result_list.append([movie_id, truth, predict])

    df = pd.DataFrame(final_result_list,
                      columns=['video_name', 'truth', 'predict'])
    save_folder = os.path.join(cg.nas_main_dir, 'results')
    ff.make_folder([save_folder])
    df.to_excel(os.path.join(
        save_folder, model + '-batch' + str(batch) + '-validation.xlsx'),
                index=False)