Python DataSet 예제들, thirdp.harvitronix.extract.data.DataSet Python 예제들

예제 #1

0

파일 보기

    def __init__(self,
                 source_dir,
                 target_dir,
                 data_file_index=0,
                 dimension=224,
                 limit_input_dirs=None,
                 generate_data_file_only=False,
                 seq_length=40,
                 use_padding=False,
                 nb_min_subsample=None):

        super(AdjustSubsampleCount,
              self).__init__(source_dir, target_dir, data_file_index,
                             dimension, limit_input_dirs,
                             generate_data_file_only)

        self.process_description = 'Adjusting Subsample Count to ' + str(
            seq_length)
        self.seq_length = seq_length

        # Get the dataset.
        self.data = DataSet(source_dir + '/data.csv',
                            target_dir,
                            seq_length=seq_length,
                            class_limit=None)

        self.use_padding = use_padding
        self.nb_min_subsample = seq_length

        if use_padding:
            if nb_min_subsample:
                self.nb_min_subsample = nb_min_subsample
            else:
                self.nb_min_subsample = seq_length / 2

예제 #2

0

파일 보기

    def __init__(self,
                 source_dir,
                 target_dir,
                 data_file_index=0,
                 dimension=224,
                 limit_input_dirs=None,
                 generate_data_file_only=False,
                 seq_length=40,
                 pretrained_model=None,
                 layer_name=None):

        super(DrawLandmarks,
              self).__init__(source_dir, target_dir, data_file_index,
                             dimension, limit_input_dirs,
                             generate_data_file_only)

        self.process_description = 'Drawing Land Marks'
        self.seq_length = seq_length

        # obtain detector and predictor
        self.detector = dlib.get_frontal_face_detector()
        self.predictor = dlib.shape_predictor(PREDICTOR_PATH)

        # Get the dataset.
        self.data = DataSet(source_dir + '/data.csv',
                            target_dir,
                            seq_length=seq_length,
                            class_limit=None)

예제 #3

0

파일 보기

    def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None,
                 generate_data_file_only=False, seq_length=40, pretrained_model=None, layer_name=None):

        super(ExtractFeatures, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs,
                                              generate_data_file_only)

        self.process_description = 'Extracting Features'
        self.seq_length = seq_length

        # get the model.
        self.extractor = Extractor(pretrained_model, layer_name, (dimension, dimension))

        # Get the dataset.
        self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None)

예제 #4

0

파일 보기

파일: exctract_landmarks_fix_seq_length.py 프로젝트: habanoz/deep-emotion-recognition-src

    def __init__(self,
                 source_dir,
                 target_dir,
                 data_file_index=0,
                 dimension=224,
                 limit_input_dirs=None,
                 generate_data_file_only=False,
                 seq_length=40):

        super(ExtractLandmarksFixLength,
              self).__init__(source_dir, target_dir, data_file_index,
                             dimension, limit_input_dirs,
                             generate_data_file_only)

        self.process_description = 'Fixing Land Marks Sequence Count'
        self.seq_length = seq_length

        # Get the dataset.
        self.data = DataSet(source_dir + '/data.csv',
                            target_dir,
                            seq_length=seq_length,
                            class_limit=None)

예제 #5

0

파일 보기

    def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None,
                 generate_data_file_only=False, seq_length=40, use_padding=False, nb_min_subsample=None):

        super(ExtractLandmarks, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs,
                                               generate_data_file_only)

        self.process_description = 'Extracting Land Marks'
        self.seq_length = seq_length

        # obtain detector and predictor
        self.detector = dlib.get_frontal_face_detector()
        self.predictor = dlib.shape_predictor(PREDICTOR_PATH)

        # Get the dataset.
        self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None)

        self.use_padding = use_padding
        self.nb_min_subsample = seq_length

        if use_padding:
            if nb_min_subsample:
                self.nb_min_subsample=nb_min_subsample
            else:
                self.nb_min_subsample=seq_length/2

예제 #6

0

파일 보기

파일: exctract_landmarks_fix_seq_length.py 프로젝트: habanoz/deep-emotion-recognition-src

class ExtractLandmarksFixLength(DataProcessBase):
    def __init__(self,
                 source_dir,
                 target_dir,
                 data_file_index=0,
                 dimension=224,
                 limit_input_dirs=None,
                 generate_data_file_only=False,
                 seq_length=40):

        super(ExtractLandmarksFixLength,
              self).__init__(source_dir, target_dir, data_file_index,
                             dimension, limit_input_dirs,
                             generate_data_file_only)

        self.process_description = 'Fixing Land Marks Sequence Count'
        self.seq_length = seq_length

        # Get the dataset.
        self.data = DataSet(source_dir + '/data.csv',
                            target_dir,
                            seq_length=seq_length,
                            class_limit=None)

    def do_process(self, source_row_tuple):
        # un-box row to variables
        input_dir, class_name, filename_no_ext, nb_sub_samples = source_row_tuple

        if int(nb_sub_samples) < self.seq_length:
            return

        # Get the path to the sequence for this sub sample.
        path = self.target_dir + '/' + input_dir + '/' + class_name + '/' + os.path.basename(
            filename_no_ext) + '-' + str(self.seq_length) + SQ_LM_FILE_SUFFIX

        # Check if we already have it.
        if os.path.isfile(path):
            return

        if not os.path.exists(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))

        # list sub-samples
        sub_samples = np.load(self.source_dir + '/' + input_dir + '/' +
                              class_name + '/' +
                              os.path.basename(filename_no_ext) +
                              LM_FILE_SUFFIX)

        # Now downs ample to just the ones we need.
        sub_samples = self.data.rescale_list(sub_samples, self.seq_length)

        sequence = []
        for sub_sample in sub_samples:
            sequence.append(sub_sample)

        # Save the sequence.
        np.save(path, np.array(sequence))

        return

    def get_nb_sub_samples(self, sample_tuple):
        """
        Return generated number of sub samples for the sample.

        :param sample_tuple: has the structure input_dir, class_name, filename_no_ext, nb_sub_samples
        :return: number of sub samples
        """

        # if file exists we know that there are exactly seq_length sub samples inside it.
        # if file does not exits we can say that there are no sub samples
        if self.check_already_extracted(sample_tuple, self.target_dir):
            return self.seq_length

        return 0

    def check_already_extracted(self, sample_tuple, target_dir):
        """Check to see if we created the -001 frame of this file."""
        input_dir, class_name, filename_no_ext, _ = sample_tuple

        return bool(
            os.path.exists(target_dir + '/' + input_dir + '/' + class_name +
                           '/' + filename_no_ext + '-' + str(self.seq_length) +
                           SQ_LM_FILE_SUFFIX))

예제 #7

0

파일 보기

class ExtractLandmarks(DataProcessBase):
    def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None,
                 generate_data_file_only=False, seq_length=40, use_padding=False, nb_min_subsample=None):

        super(ExtractLandmarks, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs,
                                               generate_data_file_only)

        self.process_description = 'Extracting Land Marks'
        self.seq_length = seq_length

        # obtain detector and predictor
        self.detector = dlib.get_frontal_face_detector()
        self.predictor = dlib.shape_predictor(PREDICTOR_PATH)

        # Get the dataset.
        self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None)

        self.use_padding = use_padding
        self.nb_min_subsample = seq_length

        if use_padding:
            if nb_min_subsample:
                self.nb_min_subsample=nb_min_subsample
            else:
                self.nb_min_subsample=seq_length/2

    def do_process(self, source_row_tuple):
        # un-box row to variables
        input_dir, class_name, filename_no_ext, nb_sub_samples = source_row_tuple

        if int(nb_sub_samples) < self.nb_min_subsample:
            return

        # Get the path to the sequence for this sub sample.
        path = self.target_dir + '/' + input_dir + '/' + class_name + '/' + os.path.basename(
            filename_no_ext) + LM_FILE_SUFFIX

        # Check if we already have it.
        if os.path.exists(path):
            return

        if not os.path.exists(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))

        # list sub-samples
        sub_samples = glob.glob(self.source_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + '*.*')

        # Now downs ample to just the ones we need.
        if self.seq_length:
            sub_samples = self.data.rescale_list(sub_samples, self.seq_length)

        sequence = []
        for sub_sample in sub_samples:
            # extract features to build the sequence.
            landmarks = self.__detect_landmarks(sub_sample)
            if landmarks:
                landmarks = self.__normalize_landmarks(landmarks)
                sequence.append(landmarks)

        if len(sequence) < self.seq_length and len(sequence) > self.nb_min_subsample and self.use_padding:
            nb_padding_needed = max(0, self.seq_length - len(sequence))
            for i in range(nb_padding_needed):
                sequence.append([0 for x in range(0, 23)]) # pad with zeros

        if len(sequence)==self.seq_length:
            # Save the sequence.
            np.save(path, np.array(sequence))

        return

    def get_nb_sub_samples(self, sample_tuple):
        """
        Return generated number of sub samples for the sample.

        :param sample_tuple: has the structure input_dir, class_name, filename_no_ext, nb_sub_samples
        :return: number of sub samples
        """

        # if file exists we know that there are exactly seq_length sub samples inside it.
        # if file does not exits we can say that there are no sub samples
        if ExtractLandmarks.check_already_extracted(sample_tuple, self.target_dir):
            input_dir, class_name, filename_no_ext, _ = sample_tuple
            lm_arr = np.load(self.target_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + LM_FILE_SUFFIX)
            return len(lm_arr)
        return 0

    def __detect_landmarks(self, image_path):

        image = Image.open(image_path)
        # convert image to numpy array
        img = np.asanyarray(image)
        img.flags.writeable = True

        # output list
        face_landmark_tuples = []

        # Obtain landmarks
        dets = self.detector(img, 1)
        print("Number of faces detected: {}".format(len(dets)))

        for k, rect in enumerate(dets):
            print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}"
                  .format(k, rect.left(), rect.top(), rect.right(), rect.bottom()))

            # Get the landmarks/parts for the face in box rect.
            shape = self.predictor(img, rect)
            face_landmark_tuples.append([shape.part(x) for x in range(68)])

        return face_landmark_tuples

    @staticmethod
    def check_already_extracted(sample_tuple, target_dir):
        """Check to see if we created the -001 frame of this file."""
        input_dir, class_name, filename_no_ext, _ = sample_tuple

        return bool(os.path.exists(target_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + LM_FILE_SUFFIX))

    def __normalize_landmarks(self, landmarks):

        landmarks=landmarks[0]

        top_point = landmarks[TOP_OF_FACE_INDEX]
        bottom_point = landmarks[BOTTOM_OF_FACE_INDEX]
        heigth = FACE_HEIGHT_MULTIPLIER * (bottom_point.y - top_point.y)

        center_y=landmarks[CENTER_OF_FACE_INDEX].y

        lm_indices = []
        lm_indices.extend(LEFT_EYEBROW_INDICES)
        lm_indices.extend(LEFT_EYE_INDICES)
        lm_indices.extend(LEFT_LIPS_INDICES)
        lm_indices.extend(LEFT_MOUTH_INDICES)

        normalized_landmarks=[]
        for idx in lm_indices:
            lm_point_y=landmarks[idx].y
            normalized_landmarks.append( abs(lm_point_y-center_y)*1.0/heigth )


        return normalized_landmarks

예제 #8

0

파일 보기

class DrawLandmarks(DataProcessBase):
    def __init__(self,
                 source_dir,
                 target_dir,
                 data_file_index=0,
                 dimension=224,
                 limit_input_dirs=None,
                 generate_data_file_only=False,
                 seq_length=40,
                 pretrained_model=None,
                 layer_name=None):

        super(DrawLandmarks,
              self).__init__(source_dir, target_dir, data_file_index,
                             dimension, limit_input_dirs,
                             generate_data_file_only)

        self.process_description = 'Drawing Land Marks'
        self.seq_length = seq_length

        # obtain detector and predictor
        self.detector = dlib.get_frontal_face_detector()
        self.predictor = dlib.shape_predictor(PREDICTOR_PATH)

        # Get the dataset.
        self.data = DataSet(source_dir + '/data.csv',
                            target_dir,
                            seq_length=seq_length,
                            class_limit=None)

    def do_process(self, source_row_tuple):
        # un-box row to variables
        input_dir, class_name, filename_no_ext, nb_sub_samples = source_row_tuple

        if int(nb_sub_samples) < self.seq_length:
            return

        # Get the path to the sequence for this sub sample.
        target_class_path = self.target_dir + '/' + input_dir + '/' + class_name + '/'

        if not os.path.exists(target_class_path):
            os.makedirs(target_class_path)

        # list sub-samples
        sub_samples = glob.glob(self.source_dir + '/' + input_dir + '/' +
                                class_name + '/' + filename_no_ext + '*.*')

        # Now downs ample to just the ones we need.
        if self.seq_length:
            sub_samples = self.data.rescale_list(sub_samples, self.seq_length)

        for sub_sample in sub_samples:

            sub_sample_img = Image.open(sub_sample)
            # extract features to build the sequence.
            landmarks = self.__detect_landmarks(sub_sample_img)
            if landmarks:
                sub_sample_img_lm = self.draw_landmarks(
                    sub_sample_img, landmarks)
                sub_sample_img_lm.save(
                    target_class_path +
                    os.path.splitext(os.path.basename(sub_sample))[0] +
                    LM_SUFFIX)

        return

    def get_nb_sub_samples(self, sample_tuple):
        """
        Return generated number of sub samples for the sample.

        :param sample_tuple: has the structure input_dir, class_name, filename_no_ext, nb_sub_samples
        :return: number of sub samples
        """
        input_dir, class_name, filename_no_ext, nb_sub_samples = sample_tuple
        sub_samples = glob.glob(self.target_dir + '/' + input_dir + '/' +
                                class_name + '/' + filename_no_ext + '*.*')

        return len(sub_samples)

    def __detect_landmarks(self, sub_sample_img):

        # convert image to numpy array
        img = np.asanyarray(sub_sample_img)
        img.flags.writeable = True

        # output list
        face_landmark_tuples = []

        # Obtain landmarks
        dets = self.detector(img, 1)
        print("Number of faces detected: {}".format(len(dets)))

        for k, rect in enumerate(dets):
            print("Detection {}: Left: {} Top: {} Right: {} Bottom: {}".format(
                k, rect.left(), rect.top(), rect.right(), rect.bottom()))

            # Get the landmarks/parts for the face in box rect.
            shape = self.predictor(img, rect)
            face_landmark_tuples.append([shape.part(x) for x in range(68)])

        return face_landmark_tuples

    def draw_landmarks(self, image, parts):
        radius = 1

        # copy original image, do not touch it
        out_image = image.copy()

        if out_image.mode != "RGB":
            out_image = out_image.convert("RGB")

        # for each part, draw a circle
        draw = ImageDraw.Draw(out_image)
        for part in parts[0]:
            x = part.x
            y = part.y
            draw.ellipse([x - radius, y - radius, x + radius, y + radius],
                         fill=(250, 0, 0))

        return out_image

예제 #9

0

파일 보기

class AdjustSubsampleCount(DataProcessBase):
    def __init__(self,
                 source_dir,
                 target_dir,
                 data_file_index=0,
                 dimension=224,
                 limit_input_dirs=None,
                 generate_data_file_only=False,
                 seq_length=40,
                 use_padding=False,
                 nb_min_subsample=None):

        super(AdjustSubsampleCount,
              self).__init__(source_dir, target_dir, data_file_index,
                             dimension, limit_input_dirs,
                             generate_data_file_only)

        self.process_description = 'Adjusting Subsample Count to ' + str(
            seq_length)
        self.seq_length = seq_length

        # Get the dataset.
        self.data = DataSet(source_dir + '/data.csv',
                            target_dir,
                            seq_length=seq_length,
                            class_limit=None)

        self.use_padding = use_padding
        self.nb_min_subsample = seq_length

        if use_padding:
            if nb_min_subsample:
                self.nb_min_subsample = nb_min_subsample
            else:
                self.nb_min_subsample = seq_length / 2

    def do_process(self, source_row_tuple):
        # un-box row to variables
        input_dir, class_name, filename_no_ext, nb_sub_samples = source_row_tuple

        if int(nb_sub_samples) < self.nb_min_subsample:
            return

        # if nb sub samples less than seq length, find required padding sub sample count
        # if padding not required, e.g. there are enough or more that enoguh samples, count is zero
        # if padding not enabled and padding is needed, previous statement does not allow reaching this statement
        nb_padding_needed = max(0, self.seq_length - int(nb_sub_samples))

        # Get the path to the sequence for this sub sample.
        path = self.target_dir + '/' + input_dir + '/' + class_name + '/' + os.path.basename(
            filename_no_ext) + FEATURE_FILE_SUFFIX

        # Check if we already have it.
        if os.path.isfile(path):
            return

        if not os.path.exists(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))

        # list sub-samples
        sub_samples = glob.glob(self.source_dir + '/' + input_dir + '/' +
                                class_name + '/' + filename_no_ext + '*.*')

        # Now downs ample to just the ones we need.
        if nb_padding_needed == 0:
            sub_samples = self.data.rescale_list(sub_samples, self.seq_length)

        for i in range(nb_padding_needed):
            empty_image = Image.new('RGB', self.dimension)
            target_file = self.target_dir + '/' + input_dir + '/' + class_name + '/' + os.path.basename(
                filename_no_ext) + '_' + '{:08d}'.format(
                    0) + '_' + '{:02d}'.format(i) + '.jpg'
            empty_image.save(target_file)

        for sub_sample in sub_samples:
            # extract features to build the sequence.
            img = cv2.imread(sub_sample)
            height, width = img.shape[:2]
            if self.dimension[0] != height or self.dimension[1] != width:
                img = cv2.resize(img, self.dimension)

            target_file = self.target_dir + '/' + input_dir + '/' + class_name + '/' + os.path.basename(
                sub_sample)
            cv2.imwrite(target_file, img)

        return

    def get_nb_sub_samples(self, sample_tuple):
        """
        Return generated number of sub samples for the sample.

        :param sample_tuple: has the structure input_dir, class_name, filename_no_ext, nb_sub_samples
        :return: number of sub samples
        """
        input_dir, class_name, filename_no_ext, _ = sample_tuple
        sub_samples = glob.glob(self.target_dir + '/' + input_dir + '/' +
                                class_name + '/' + filename_no_ext + '*.*')

        return len(sub_samples)

예제 #10

0

파일 보기

def train(source_dir, work_root_dir, data_type, seq_length, model, saved_model=None,
          concat=False, image_shape=None, load_to_memory=False):

    if not os.path.exists(work_root_dir):
        os.makedirs(work_root_dir)
        os.makedirs(work_root_dir+'/checkpoints')
        os.makedirs(work_root_dir+'/logs')

    # Set variables.
    nb_epoch = 2000
    batch_size = 8

    data_file=source_dir+'/data.csv'

    # Helper: TensorBoard
    tb = TensorBoard(log_dir=work_root_dir+'/logs')

    # Helper: Save the model.
    checkpointer = ModelCheckpoint(save_weights_only=False,
                                   filepath=work_root_dir + '/checkpoints/w.{epoch:03d}-{val_acc:.4f}-{val_loss:.2f}.hdf5',
                                   verbose=1,
                                   save_best_only=True, monitor='val_acc')

    # Helper: Stop when we stop learning.
    early_stopper = EarlyStopping(patience=20, monitor='val_acc')


    # Helper: Save results.
    timestamp = time.time()
    csv_logger = CSVLogger(work_root_dir+'/logs/' + model + '-' + 'training-' + \
        str(timestamp) + '.log')

    # Get the data and process it.
    if image_shape is None:
        features_length = determine_feature_count(source_dir)
        data = DataSet(
            data_file=data_file,
            sequence_dir=source_dir,
            seq_length=seq_length,
            class_limit=None
            ,given_classes=None
        )
    else:
        features_length = None
        data = DataSet(
            data_file=data_file,
            sequence_dir=source_dir,
            seq_length=seq_length,
            class_limit=None,
            image_shape=image_shape
            , given_classes=None
        )

    # Get samples per epoch.
    # Multiply by 0.7 to attempt to guess how much of data.data is the train set.
    steps_per_epoch = (len(data.data) * 0.7) // batch_size

    X=None
    y=None
    X_test=None
    y_test=None
    generator=None
    val_generator=None

    if load_to_memory:
        # Get data.
        X, y = data.get_all_sequences_in_memory(True, data_type, concat)
        X_test, y_test = data.get_all_sequences_in_memory(False, data_type, concat)

        print ("Train samples %d, test samples %d"%(len(X),len(X_test)))
    else:
        # Get generators.
        generator = data.frame_generator(batch_size, True, data_type, concat)
        val_generator = data.frame_generator(1, False, data_type, concat)

    # Get the model.
    rm = ResearchModels(len(data.classes), model, seq_length, saved_model,features_length=features_length,dimension=image_shape)

    # Fit!
    if load_to_memory:
        # Use standard fit.
        history=rm.model.fit(
            X,
            y,
            batch_size=batch_size,
            validation_data=(X_test, y_test),
            verbose=1,
            callbacks=[checkpointer, tb, early_stopper, csv_logger],
            epochs=nb_epoch)
    else:
        # Use fit generator.
        history = rm.model.fit_generator(
            generator=generator,
            steps_per_epoch=steps_per_epoch,
            epochs=nb_epoch,
            verbose=1,
            callbacks=[checkpointer, tb, early_stopper, csv_logger],
            validation_data=val_generator,
            validation_steps=365)

    if val_generator:
        _, test=data.split_train_test()
        present_results_generator(work_root_dir, rm.model, history, val_generator, len(test),classes=data.classes)
    else:
        present_results(work_root_dir, rm.model,history, X_test=X_test, Y_test=y_test, classes=data.classes)

예제 #11

0

파일 보기

class ExtractFeatures(DataProcessBase):
    def __init__(self, source_dir, target_dir, data_file_index=0, dimension=224, limit_input_dirs=None,
                 generate_data_file_only=False, seq_length=40, pretrained_model=None, layer_name=None):

        super(ExtractFeatures, self).__init__(source_dir, target_dir, data_file_index, dimension, limit_input_dirs,
                                              generate_data_file_only)

        self.process_description = 'Extracting Features'
        self.seq_length = seq_length

        # get the model.
        self.extractor = Extractor(pretrained_model, layer_name, (dimension, dimension))

        # Get the dataset.
        self.data = DataSet(source_dir + '/data.csv', target_dir, seq_length=seq_length, class_limit=None)

    def do_process(self, source_row_tuple):
        # un-box row to variables
        input_dir, class_name, filename_no_ext, nb_sub_samples = source_row_tuple

        if int(nb_sub_samples) < self.seq_length:
            return

        # Get the path to the sequence for this sub sample.
        path = self.target_dir + '/' + input_dir + '/'+ class_name + '/' + os.path.basename(filename_no_ext) + FEATURE_FILE_SUFFIX

        # Check if we already have it.
        if os.path.isfile(path):
            return

        if not os.path.exists(os.path.dirname(path)):
            os.makedirs(os.path.dirname(path))

        # list sub-samples
        sub_samples = glob.glob(self.source_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + '*.*')

        # Now downs ample to just the ones we need.
        sub_samples = self.data.rescale_list(sub_samples, self.seq_length)

        sequence = []
        for sub_sample in sub_samples:
            # extract features to build the sequence.
            features = self.extractor.extract(sub_sample)
            sequence.append(features)

        # Save the sequence.
        np.savetxt(path, np.array(sequence).reshape((self.seq_length, -1)))

        return

    def get_nb_sub_samples(self, sample_tuple):
        """
        Return generated number of sub samples for the sample.

        :param sample_tuple: has the structure input_dir, class_name, filename_no_ext, nb_sub_samples
        :return: number of sub samples
        """

        # if file exists we know that there are exactly seq_length sub samples inside it.
        # if file does not exits we can say that there are no sub samples
        if ExtractFeatures.check_already_extracted(sample_tuple, self.target_dir):
            return self.seq_length

        return 0

    @staticmethod
    def check_already_extracted(sample_tuple, target_dir):
        """Check to see if we created the -001 frame of this file."""
        input_dir, class_name, filename_no_ext, _ = sample_tuple

        return bool(os.path.exists(
            target_dir + '/' + input_dir + '/' + class_name + '/' + filename_no_ext + FEATURE_FILE_SUFFIX))