def __data_preparation(self, path_input_dir, N):
        assert os.path.exists(path_input_dir)
        assert isinstance(N, int)

        list_of_path = data_loader.make_path_pic_list(path_input_dir)
        index_datapath_mapper, dataset = data_loader.make_data_matrix(list_of_input_files=list_of_path)
        self.dataset = dataset
        self.index_datapath_mapper = index_datapath_mapper
        train_test_object = data_loader.split_data_train_and_test(self.dataset, N)

        self.y_train = train_test_object['train']
        self.y_test = train_test_object['test']
        self.N_test = train_test_object['N_test']

        if self.is_add_noise==True:
            x_train = self.add_noise(train_set=train_test_object['train'], noise_ratio=self.noise_rate)
            x_test = self.add_noise(train_set=train_test_object['test'], noise_ratio=self.noise_rate)
            n_dimension = x_train.shape[1]
        else:
            x_train = train_test_object['train']
            x_test = train_test_object['test']
            n_dimension = x_train.shape[1]

        assert isinstance(x_train, np.ndarray)
        assert isinstance(x_test, np.ndarray)
        assert len(x_train.shape) == 2
        assert len(x_test.shape) == 2

        self.x_train = x_train
        self.x_test = x_test
        self.n_dimension = n_dimension
def main(list_of_input_files, path_to_save_directory, project_name):
    assert isinstance(list_of_input_files, list)
    assert os.path.exists(path_to_save_directory)
    assert isinstance(project_name, (str, unicode))

    if os.path.exists(os.path.join(path_to_save_directory, project_name))==False:
        os.mkdir(os.path.join(path_to_save_directory, project_name))

    index_datapath_mapper, data_matrix = data_loader.make_data_matrix(list_of_input_files=list_of_input_files)

    train = FacePicDataSet(data=data_matrix)
    train.use_design_loc(os.path.join(path_to_save_directory, project_name, '{}.npy'.format(project_name)))

    train_csv_path = os.path.join(path_to_save_directory, project_name, '{}.csv'.format(project_name))
    train_pkl_path = os.path.join(path_to_save_directory, project_name, '{}.pkl'.format(project_name))
    # save in pickle
    serial.save(train_pkl_path, train)
    # save index_datasource_dict
    with codecs.open(os.path.join(path_to_save_directory, project_name, '{}_index_data.json'.format(project_name)),
                     'w', 'utf-8') as f:
        f.write(json.dumps(index_datapath_mapper, indent=4, ensure_ascii=False))