コード例 #1
0
ファイル: generator.py プロジェクト: lucasgris/lid-latinoware
 def _delete_random(self, n_files):
     files = random.sample(self.files, n_files)
     for file in files:
         logm(f'Removing file {file}',
              cur_frame=currentframe(),
              mtype='I',
              stdout=False)
         self.files.remove(file)
         os.remove(file)
コード例 #2
0
ファイル: generator.py プロジェクト: lucasgris/lid-latinoware
 def __init__(self,
              save_fn=np.save,
              load_fn=np.load,
              heap_dir='tmp',
              file_format='npy',
              recover_state=True,
              max_size=None,
              use_item_as_key=False,
              depth=4):
     """
     Creates a heap.
     
     Args:
         save_fn (callable, optional): Function to save data.
             Must be the following 
             Defaults to np.save.
         load_fn (callable, optional): Function to load data.
             Defaults to np.load.
         heap_dir (str, optional): Heap directory. Defaults to 'tmp'.
         file_format (str, optional): . Defaults to 'npy'.
         recover_state (bool, optional): Choose to recover a previous 
             state of the heap. This will force the heap to get all saved
             files in the provided directory. Defaults to True.
         max_size (int, optional): Max size of the heap. When the heap
             reaches the max size, it will start to delete random instances
             automatically. Defaults to None.
         use_item_as_key (bool, optional): Use the str(item) as key not the
             name. Use carefully. Defaults to False.
         depth (int, optional): Depth of the heap. More depth means more
             subdirectories. Defaults to 4.
     """
     logm(f'Creating heap at {heap_dir}',
          cur_frame=currentframe(),
          mtype='I')
     os.makedirs(heap_dir, exist_ok=True)
     self.save_fn = save_fn
     self.load_fn = load_fn
     self.files = set()
     self.depth = depth
     self.heap_dir = heap_dir
     self.file_format = file_format
     self.use_item_as_key = use_item_as_key
     self.max_size = max_size
     if recover_state:
         saved_files = glob.glob(self.heap_dir + '/**/*.' +
                                 self.file_format,
                                 recursive=True)
         self.files = set([os.path.basename(fp) for fp in saved_files])
         logm(
             f'Recovering state of heap at {heap_dir}:'
             f' Found {len(self.files)} files',
             cur_frame=currentframe(),
             mtype='I')
コード例 #3
0
ファイル: callbacks.py プロジェクト: lucasgris/lid-latinoware
    def on_epoch_end(self, epoch, logs={}):
        elapsed_time = time.time() - self.start_time
        self.time_logs.append(elapsed_time)

        avg_elapsed_time = (float(sum(self.time_logs)) /
                            max(len(self.time_logs), 1))

        logm(
            'Average elapsed time: '
            f'{str(self.seconds - self.safety_factor * avg_elapsed_time)}',
            cur_frame=currentframe(),
            mtype='I')
        if elapsed_time > self.seconds - self.safety_factor * avg_elapsed_time:
            self.model.stop_training = True
            if self.verbose:
                logm('Stopping after %s seconds.' % self.seconds,
                     cur_frame=currentframe(),
                     mtype='I')
コード例 #4
0
def test_space(spaces, remove_bad_topologies=True):
    pp = pprint.PrettyPrinter(indent=4)
    for i, space in enumerate(spaces):
        logm(f'Testing space [{i+1} of {len(spaces)}]',
             cur_frame=currentframe(),
             mtype='I')
        pp.pprint(space)
        try:
            K.clear_session()
            model = build_model(conf,
                                space,
                                input_shape=(SPEC_SHAPE_HEIGTH,
                                             SPEC_SHAPE_WIDTH, CHANNELS))
        except ValueError as err:
            logm(f'Failed when building the model: {str(err)} ',
                 cur_frame=currentframe(),
                 mtype='I')
            if remove_bad_topologies:
                del space
            continue
    return spaces
コード例 #5
0
def spec_load_and_rshp(path, expected_fmt='png', remove_bad_file=True):
    """
    Loads an image and reshapes in the proper format.
    
    Args:
        path (str): path to the image. 
        expected_fmt (str, optional): Expected format of the data.
            Choices = [png, npy]. If png will load as image using Image.open,
            if npy will load as numpy array using np.load. Defaults to 'png'.
        remove_bad_file (bool, optional): If True will remove the file if it
            is not in the expected shape or format. Defaults to True.
    
    Raises:
        err (ValueError): If the file is not in the expected format. 
    
    Returns:
        np.ndarray: An array of shape (SPEC_SHAPE_HEIGTH, SPEC_SHAPE_WIDTH, 3)
            with the loaded data or zeros if an expection occured when
            loading the data.
    """
    if path[-3:] != expected_fmt:
        err = ValueError(f'File must be in format {expected_fmt}. '
                         f'(When trying to load {path})')

        logm(f'Exception {str(err)}', cur_frame=currentframe(), mtype='E')
        raise err
    try:
        if format == 'png':
            spc = np.array(Image.open(path)).reshape(SPEC_SHAPE_HEIGTH,
                                                     SPEC_SHAPE_WIDTH,
                                                     CHANNELS)
        if format == 'npy':
            spc = np.load(path).reshape(SPEC_SHAPE_HEIGTH, SPEC_SHAPE_WIDTH,
                                        CHANNELS)
        spc = normalize(spc)
    except Exception as ex:
        logm(f'Bad file: {str(ex)} (when trying to load {path})',
             cur_frame=currentframe(),
             mtype='E')
        if remove_bad_file:
            logm(f'Removing file {path}', cur_frame=currentframe(), mtype='I')
            if not os.path.isfile(path):
                logm(f'Removing file {path}: is not a file',
                     cur_frame=currentframe(),
                     mtype='W')
            else:
                os.remove(path)
        return np.zeros(shape=(SPEC_SHAPE_HEIGTH, SPEC_SHAPE_WIDTH, CHANNELS))
    return spc
コード例 #6
0
def setup_heap(conf: Config):
    logm('Setting up heap...', cur_frame=currentframe(), mtype='I')
    logm(f'Setting up heap: the heap directory is {conf.heap_dir}',
         cur_frame=currentframe(),
         mtype='I')
    if not conf.use_heap:
        logm('Setting up heap: heap is deactivated',
             cur_frame=currentframe(),
             mtype='W')
        return None
    heap = Heap(load_fn=spec_load_and_rshp,
                save_fn=spec_save,
                file_format='png',
                heap_dir=HEAP_DIR)
    logm('Setting up heap... Done', cur_frame=currentframe(), mtype='I')
    return heap
コード例 #7
0
ファイル: generator.py プロジェクト: lucasgris/lid-latinoware
 def add(self, item, key, replace_if_exists=False):
     """
     Adds a new item.
     
     Args:
         item: The item to save.
         key (str): Key of the file. Usually its name.
         replace_if_exists (bool, optional): It True, will replace existing
             files with the same key. Defaults to False.
     """
     f_path = self._generate_file_path(
         item if self.use_item_as_key else key)
     os.makedirs(os.path.join(self.heap_dir, f_path), exist_ok=True)
     try:
         self.save_fn(os.path.join(self.heap_dir, f_path, key), item)
         self.files.add(f'{key}.{self.file_format}')
     except FileExistsError as fe:
         logm(f'File exists ({str(fe)})',
              cur_frame=currentframe(),
              mtype='W')
         if replace_if_exists:
             logm(f'Replacing file {key}',
                  cur_frame=currentframe(),
                  mtype='W')
             os.remove(
                 os.path.join(self.heap_dir, f_path,
                              f'{key}.{self.file_format}'))
             self.save_fn(os.path.join(self.heap_dir, f_path, key), item)
             self.files.add(f'{key}.{self.file_format}')
     if self.max_size:
         files = glob.glob(self.heap_dir + '/**/*.' + self.file_format,
                           recursive=True)
         if len(files) > self.max_size:
             logm(
                 f'Max heap size {self.max_size} '
                 'reached: removing 100 random files',
                 cur_frame=currentframe(),
                 mtype='W')
             threading.Thread(target=self._delete_random, args=100).start()
コード例 #8
0
ファイル: generator.py プロジェクト: lucasgris/lid-latinoware
    def __getitem__(self, index) -> (np.ndarray, np.ndarray):
        paths = self._paths[(index * self._batch_size):((index + 1) *
                                                        self._batch_size)]
        labels = self._labels[(index * self._batch_size):((index + 1) *
                                                          self._batch_size)]
        paths_and_labels = list(zip(paths, labels))
        # Fill batches
        x = []
        y = []
        threshold = 0
        for path_label in paths_and_labels:
            if self._not_found_ok:
                try:
                    # TODO: check if is more optimal load from heap or not
                    # TODO: duplicated code
                    if self._heap and os.path.basename(path_label[0]) in \
                            self._heap:
                        x.append(
                            self._heap.get(os.path.basename(path_label[0])))
                    else:
                        data = self._loader(path_label[0], **self._loaderkw)
                        if self._heap:
                            self._heap.add(data,
                                           os.path.basename(path_label[0]))
                        x.append(data)
                    y.append(path_label[1])
                except FileNotFoundError as fnf:
                    logm(f'File {path_label[0]} not found ({str(fnf)})',
                         cur_frame=currentframe(),
                         mtype='E')
                    # If not found, append a new path to load
                    p, l = self._get_random_instance()
                    paths_and_labels.append((p, l))
                    # Increase a threshold value to avoid infinite loops
                    threshold += 1

                    if threshold == 10:
                        # (threshold can be any value)
                        raise RuntimeError(
                            'Threshold value reached. Error when '
                            'trying to read the files provided '
                            '(not able to fill the batch).')
                    continue
            else:  # Read data without handling the exception
                # TODO: duplicated code
                if (self._heap
                        and os.path.basename(path_label[0]) in self._heap):
                    x.append(self._heap.get(os.path.basename(path_label[0])))
                else:
                    data = self._loader(path_label[0], **self._loaderkw)
                    if self._heap:
                        self._heap.add(data, os.path.basename(path_label[0]))
                    x.append(data)
                y.append(path_label[1])

            if (self._expected_shape is not None
                    and x[-1].shape != self._expected_shape):
                logm(
                    f'Expected shape {self._expected_shape} when loading '
                    f'{path_label[0]}. But found shape of {x[-1].shape} '
                    'instead',
                    cur_frame=currentframe(),
                    mtype='W')
                # TODO: remove file
                # If the last read data is not in the expected shape
                p, l = self._get_random_instance()
                paths_and_labels.append((p, l))
                # Increase a threshold value to avoid infinite loops
                threshold += 1
                # Remove the last instance
                x.pop()
                y.pop()

                # If all data was tried to be read, raise an exception
                if threshold == self._batch_size:
                    err = RuntimeError('Threshold value reached. Error when '
                                       'trying to read the files provided '
                                       '(not able to fill the batch).')
                    logm(f'Exception {str(err)}',
                         cur_frame=currentframe(),
                         mtype='E')
                    raise err
                continue
        return np.asarray(x), np.asarray(y)
コード例 #9
0
def setup_clbks(conf: Config,
                setup_tb=False,
                setup_mc=True,
                setup_ts=False,
                setup_es=False):
    logm('Setting up callbacks', cur_frame=currentframe(), mtype='I')
    callbacks = []
    if setup_tb:
        logm('Setting up tensorboard', cur_frame=currentframe(), mtype='I')
        if conf.use_tb_embeddings:
            paths, labels = parse_csv(conf.test_data_csv, conf.data_path)
            test_set = TestDataset(paths, labels)

            with open(os.path.join(conf.log_dir, 'metadata.tsv'), 'w') as f:
                np.savetxt(f, labels, delimiter=",", fmt='%s')

            logm(
                f'Loading test data ({len(paths)} samples) '
                'for tensorboard callback...',
                cur_frame=currentframe(),
                mtype='I')
            y_test = test_set()[1]
            x_test = np.asarray([conf.data_loader(x) for x in test_set()[0]])

            logm(
                f'Loading test data ({len(paths)} samples) for '
                'tensorboard callback... Done',
                cur_frame=currentframe(),
                mtype='I')

            print('x_test shape:', x_test.shape)
            tb = TensorBoard(log_dir=os.path.join(conf.log_dir, 'tensorboard'),
                             histogram_freq=1,
                             batch_size=conf.batch_size,
                             write_graph=True,
                             write_grads=True,
                             write_images=True,
                             embeddings_freq=5,
                             embeddings_layer_names=['features'],
                             embeddings_metadata='metadata.tsv',
                             embeddings_data=x_test)
        else:
            tb = TensorBoard(log_dir=conf.log_dir,
                             histogram_freq=1,
                             batch_size=conf.batch_size,
                             write_graph=True)
        callbacks.append(tb)
    if setup_ts:
        logm('Setting up TimedStopping', cur_frame=currentframe(), mtype='I')
        if conf.max_seconds_per_run:
            callbacks.append(TimedStopping(conf.max_seconds_per_run))
        else:
            logm(
                'Could not set up TimedStopping: '
                'conf.max_seconds_per_run is set as None',
                cur_frame=currentframe(),
                mtype='W')
    if setup_es:
        logm('Setting up EarlyStopping', cur_frame=currentframe(), mtype='I')
        callbacks.append(EarlyStopping(patience=5))
    if setup_mc:
        logm('Setting up ModelCheckpoint', cur_frame=currentframe(), mtype='I')
        callbacks.append(
            ModelCheckpoint(f'{conf.model_checkpoint_location}.h5',
                            period=1,
                            save_best_only=True))

    logm('Setting up callbacks... Done', cur_frame=currentframe(), mtype='I')
    return callbacks
コード例 #10
0
def main(conf: Config):
    model = build_model(conf)
    conf.model_name = 'mit'
    logm(f'Current configuration is:\n{repr(conf)}',
         cur_frame=currentframe(),
         mtype='I')
    model.summary()

    setup_dirs(conf)
    if TRAIN:
        logm(f'Start train: {str(conf)}', cur_frame=currentframe(), mtype='I')
        with Timer() as t:
            train(model, conf, developing=DEVELOPING)

        logm(f'End train: total time taken: {str(t.interval)}',
             cur_frame=currentframe(),
             mtype='I')
    if SAVE_MODEL:
        logm(f'Saving model: {str(conf.model_name)} at {conf.model_location}',
             cur_frame=currentframe(),
             mtype='I')
        model.save(conf.model_location + '.h5')
    if EVALUATE:
        logm(f'Start evaluation: {str(conf)}',
             cur_frame=currentframe(),
             mtype='I')

        test_paths, test_labels = parse_csv(conf.test_data_csv, conf.data_path)
        if DEVELOPING:
            logm('Developing is set as true: limiting size of dataset',
                 cur_frame=currentframe(),
                 mtype='I')
            paths_labels = list(zip(test_paths, test_labels))
            random.shuffle(paths_labels)
            test_paths, test_labels = zip(*paths_labels)
            test_paths = test_paths[:10]
            test_labels = test_labels[:10]

        logm(f'Get test data: total of {len(test_labels)}',
             cur_frame=currentframe(),
             mtype='I')
        if not TRAIN:
            logm(f'Loading model from {conf.model_location}',
                 cur_frame=currentframe(),
                 mtype='I')
            if conf.model_location is None:
                logm(
                    'Model location is None: '
                    'Load the configuration file of a valid trained '
                    'model',
                    cur_frame=currentframe(),
                    mtype='E')
                raise ValueError('Not valid configuration file for evaluation')
            model = load_model(conf.model_location + '.h5')
        logm(f'Start evaluation of {str(conf)}',
             cur_frame=currentframe(),
             mtype='I')
        test_dataset = TestDataset(test_paths,
                                   test_labels,
                                   name=conf.dataset_name,
                                   num_classes=conf.num_classes)
        with Timer() as t:
            evaluate(model, conf, test_dataset, 'final')
        logm(f'End evaluation: total time taken: {str(t.interval)}',
             cur_frame=currentframe(),
             mtype='I')
        if EVALUATE_BEST_MODEL:
            logm('Evaluating best model', cur_frame=currentframe(), mtype='I')
            logm(f'Loading model from {conf.model_checkpoint_location}',
                 cur_frame=currentframe(),
                 mtype='I')
            model = load_model(conf.model_checkpoint_location + '.h5')
            logm(f'Start evaluation of {str(conf)} (best model)',
                 cur_frame=currentframe(),
                 mtype='I')
            with Timer() as t:
                evaluate(model, conf, test_dataset, 'best')
            logm(
                'End evaluation (best model): total time taken: '
                f'{str(t.interval)}',
                cur_frame=currentframe(),
                mtype='I')
コード例 #11
0
def setup_dirs(conf: Config):
    logm('Setting up directories...', cur_frame=currentframe(), mtype='I')

    models_checkpoint_dir = os.path.join(MODELS_CHECKPOINT_DIR, str(conf))
    os.makedirs(models_checkpoint_dir, exist_ok=True)
    conf.models_checkpoint_dir = models_checkpoint_dir
    logm(
        'Setting up directories: the model checkpoints directory is '
        f'{conf.models_checkpoint_dir}',
        cur_frame=currentframe(),
        mtype='I')
    log_dir = os.path.join(LOG_DIR, str(conf))
    os.makedirs(log_dir, exist_ok=True)
    conf.log_dir = log_dir
    logm(f'Setting up directories: the log dir is {conf.log_dir}',
         cur_frame=currentframe(),
         mtype='I')

    models_dir = os.path.join(MODELS_DIR, str(conf))
    os.makedirs(models_dir, exist_ok=True)
    conf.models_dir = models_dir
    logm(
        'Setting up directories: the model directory is '
        f'{conf.models_dir}',
        cur_frame=currentframe(),
        mtype='I')

    heap_dir = HEAP_DIR
    os.makedirs(heap_dir, exist_ok=True)
    conf.heap_dir = heap_dir
    logm('Setting up directories: the heap directory is '
         f'{conf.heap_dir}',
         cur_frame=currentframe(),
         mtype='I')

    logm('Setting up directories... Done', cur_frame=currentframe(), mtype='I')
コード例 #12
0
def train(model: keras.Model, conf: Config, developing=False):
    logm(f'Running train for {conf}', cur_frame=currentframe(), mtype='I')

    paths, labels = parse_csv(conf.train_data_csv, conf.data_path)
    if developing:
        logm('Developing is set as true: limiting size of dataset',
             cur_frame=currentframe(),
             mtype='I')
        paths_labels = list(zip(paths, labels))
        random.shuffle(paths_labels)
        paths, labels = zip(*paths_labels)
        paths = paths[:100]
        labels = labels[:100]
        epochs = 2
        conf.steps_per_epoch = 10

    dataset = Dataset(paths,
                      labels,
                      shuffle=True,
                      val_split=0.005,
                      name=conf.dataset_name)

    logm(f'Loading validation data...', cur_frame=currentframe(), mtype='I')
    X_val = np.asarray([conf.data_loader(x) for x in dataset.validation[0]])
    y_val = dataset.validation[1]
    logm(f'Loading validation data... Done',
         cur_frame=currentframe(),
         mtype='I')
    logm(f'Validation data shape is {X_val.shape}',
         cur_frame=currentframe(),
         mtype='I')

    if conf.use_generator:
        logm('Using generator', cur_frame=currentframe(), mtype='I')
        batch_size = (conf.batch_size
                      if conf.batch_size < len(dataset.train[0]) else len(
                          dataset.train[0]))
        train_gen = Generator(paths=dataset.train[0],
                              labels=dataset.train[1],
                              loader_fn=conf.data_loader,
                              batch_size=batch_size,
                              heap=setup_heap(conf),
                              expected_shape=(SPEC_SHAPE_HEIGTH,
                                              SPEC_SHAPE_WIDTH, 3))
        history = model.fit_generator(
            generator=train_gen,
            validation_data=(X_val, y_val),
            use_multiprocessing=True,
            max_queue_size=96,
            workers=12,
            steps_per_epoch=conf.steps_per_epoch,
            epochs=conf.epochs if not developing else 2,
            callbacks=setup_clbks(conf, conf.log_dir),
            verbose=1)
    else:
        X_train = np.asarray([conf.data_loader(x) for x in dataset.train[0]])
        y_train = dataset.train[1]
        history = model.fit(x=X_train,
                            y=y_train,
                            batch_size=conf.batch_size,
                            validation_data=(X_val, y_val),
                            epochs=conf.epochs if not developing else 2,
                            callbacks=setup_clbks(conf, conf.log_dir),
                            verbose=1)
    pickle.dump(history,
                open(os.path.join(conf.log_dir, f'history_{conf}.pkl'), 'wb'),
                protocol=3)
    return history
コード例 #13
0
                 cur_frame=currentframe(),
                 mtype='I')
            with Timer() as t:
                evaluate(model, conf, test_dataset, 'best')
            logm(
                'End evaluation (best model): total time taken: '
                f'{str(t.interval)}',
                cur_frame=currentframe(),
                mtype='I')


if __name__ == '__main__':
    with open(LOG_FILE, 'a') as log_file:
        log_file.write('\n---------------------------------------------\n')
        log_file.write(f'LOG: {__file__}: {datetime.now()}\n')
    try:
        if (not TRAIN and EVALUATE) or LOAD_CONF:
            if CONF_LOCATION is None:
                raise Exception('Must set CONF_LOCATION')
            conf = Config.frompicke(CONF_LOCATION)
        else:
            conf = Config(conf_name=f'{__file__}_{SAMPLING_RATE}_'
                          f'{MAX_SECONDS_PER_RUN}_{STEPS_PER_EPOCH}',
                          data_loader=wav_to_specdata,
                          use_tb_embeddings=TB_EMBEDDINGS)
        main(conf)
    except Exception as err:
        logm(f'FATAL ERROR: {str(err)}', cur_frame=currentframe(), mtype='E')
        os.remove(str(conf) + '.pkl')
        raise err
コード例 #14
0
def main(conf: Config):
    logm('>>> Started random hyperparameter search <<<')

    conf.model_name = f'random_search_{TIME_NOW}'
    logm(f'Current configuration is:\n{repr(conf)}',
         cur_frame=currentframe(),
         mtype='I')

    setup_dirs(conf)
    logm('Creating space', cur_frame=currentframe())
    space = create_space(RUNS)
    space = test_space(space, remove_bad_topologies=True)
    logm(f'Start random search: {str(conf)}', cur_frame=currentframe())
    with Timer() as t:
        random_search(conf, space)

    logm(f'End random search: total time taken: {str(t.interval)}',
         cur_frame=currentframe(),
         mtype='I')

    logm(
        'End evaluation (best model): total time taken: '
        f'{str(t.interval)}',
        cur_frame=currentframe(),
        mtype='I')
コード例 #15
0
def random_search(conf: Config, search_space):
    logm(f'Running random search for {conf}',
         cur_frame=currentframe(),
         mtype='I')
    logm(f'Created report file at {conf.report_file}',
         cur_frame=currentframe(),
         mtype='I')

    with open(conf.report_file, 'w') as output:
        output.write(f'run,{params_keys_comma_separated()},validation_acc,'
                     'time_taken\n')

    for i, space in enumerate(search_space):
        logm(
            f'Running random search on space {space} - {i+1} of '
            f'{len(search_space)}',
            cur_frame=currentframe(),
            mtype='I')

        if conf.time_limit is not None and datetime.now() > conf.time_limit:
            logm('Time limit reached: end random search',
                 cur_frame=currentframe(),
                 mtype='I')
            return
        logm('Buiding model', cur_frame=currentframe(), mtype='I')
        try:
            K.clear_session()
            model = build_model(conf,
                                space,
                                input_shape=(SPEC_SHAPE_HEIGTH,
                                             SPEC_SHAPE_WIDTH, CHANNELS))
        except ValueError as err:
            logm(f'Error when building the model: {str(err)} ',
                 cur_frame=currentframe(),
                 mtype='E')
            continue
        model.summary()
        with Timer() as t:
            result, model = train(model,
                                  conf,
                                  space['batch_size'],
                                  developing=DEVELOPING)
        time_taken = t.interval

        validation_acc = np.amax(result.history['val_acc'])
        with open(conf.report_file, 'a') as output:
            output.write(f'{conf.run},{params_values_comma_separated(space)},'
                         f'{validation_acc},{time_taken}\n')
        conf.run += 1
        with open(os.path.join(conf.log_dir, f'hist_acc_{conf.run}.csv'),
                  'w') as output:
            output.write(f'epoch,val_acc\n')
            for itr, val_acc in enumerate(result.history['val_acc']):
                output.write(f'{itr+1},{val_acc}\n')
        if SAVE_MODEL:
            model_path = os.path.join(conf.models_dir,
                                      conf.model_name + f'_{conf.run}.h5')
            logm(f'Saving model at {model_path}',
                 cur_frame=currentframe(),
                 mtype='I')
            model.save(model_path)
コード例 #16
0
def train(model: Model, conf: Config, batch_size, developing=False):
    logm(f'Running train for {conf}', cur_frame=currentframe(), mtype='I')
    train_paths, train_labels = parse_csv(conf.train_data_csv, conf.data_path)
    val_paths, val_labels = parse_csv(conf.eval_data_csv, conf.data_path)
    if developing:
        logm('Developing is set as true: limiting size of dataset',
             cur_frame=currentframe(),
             mtype='I')
        train_paths_labels = list(zip(train_paths, train_labels))
        random.shuffle(train_paths_labels)
        train_paths, train_labels = zip(*train_paths_labels)
        train_paths = train_paths[:100]
        train_labels = train_labels[:100]
        val_paths = val_paths[:10]
        val_labels = val_labels[:10]
        epochs = 2
        conf.steps_per_epoch = 10

    train_dataset = Dataset(train_paths,
                            train_labels,
                            name=conf.dataset_name + '[TRAIN]',
                            num_classes=NUM_CLASSES)
    val_dataset = Dataset(val_paths,
                          val_labels,
                          name=conf.dataset_name + '[VALIDATION]',
                          num_classes=NUM_CLASSES)

    epochs = int(len(train_dataset) // (batch_size * conf.steps_per_epoch)) + 1
    logm(
        f'Calculated number of epochs to process all data at least 1 time: {epochs}',
        cur_frame=currentframe())

    logm(f'Loading validation data...', cur_frame=currentframe(), mtype='I')
    X_val = np.asarray([conf.data_loader(x) for x in val_dataset()[0]])
    y_val = val_dataset()[1]
    logm(f'Loading validation data... Done',
         cur_frame=currentframe(),
         mtype='I')
    logm(f'Validation data shape is {X_val.shape}',
         cur_frame=currentframe(),
         mtype='I')
    if conf.use_generator:
        logm('Using generator', cur_frame=currentframe(), mtype='I')
        train_gen = Generator(paths=train_dataset()[0],
                              labels=train_dataset()[1],
                              loader_fn=conf.data_loader,
                              batch_size=int(batch_size),
                              heap=setup_heap(conf) if conf.use_heap else None,
                              expected_shape=(SPEC_SHAPE_HEIGTH,
                                              SPEC_SHAPE_WIDTH, CHANNELS))
        history = model.fit_generator(generator=train_gen,
                                      validation_data=(X_val, y_val),
                                      use_multiprocessing=True,
                                      max_queue_size=96,
                                      workers=12,
                                      steps_per_epoch=conf.steps_per_epoch,
                                      epochs=epochs,
                                      callbacks=setup_clbks(conf),
                                      verbose=1)
    else:
        X_train = np.asarray([conf.data_loader(x) for x in train_dataset()[0]])
        y_train = train_dataset()[1]
        history = model.fit(x=X_train,
                            y=y_train,
                            batch_size=int(batch_size),
                            validation_data=(X_val, y_val),
                            epochs=epochs,
                            callbacks=setup_clbks(conf),
                            verbose=1)
    return history, model