Beispiel #1
0
 def extract_dataset(option, file_path):
     data_dir = ''
     info_file = ''
     if option == 'option1':
         try:
             find_image_files_folder_per_class(data_dir)
         except AssertionError:
             return False
     elif option == 'option2':
         try:
             find_image_files_from_file(data_dir, info_file)
         except AssertionError:
             return False
     elif option == 'option3':
         pass
     return True
Beispiel #2
0
    def test_upload(self, request):
        test_file = request.files['input_file']
        filename = test_file.filename

        dataset_test_path = os.path.join(
            self._dataset.get_dataset_path().replace('train', 'test'),
            filename.split('.')[0])
        path_file = os.path.join(dataset_test_path, filename)
        os.makedirs(dataset_test_path, exist_ok=True)

        test_file.save(path_file)

        try:
            if path_file.endswith('.npz'):
                try:
                    _, test_data = find_dataset_from_numpy(path_file,
                                                           requires_y=False,
                                                           only_test=True)
                    if test_data is None:
                        tree_remove(dataset_test_path)
                        return 'The file contents are not valid.'
                    np.savez(path_file, x=test_data[0], y=test_data[1])
                    open(os.path.join(dataset_test_path, '.option0'),
                         'w')  # NUMPY FILE
                    return 'ok'
                except KeyError:
                    tree_remove(dataset_test_path)
                    return "The file contents are not valid."
            else:
                if not unzip(path_file, dataset_test_path):
                    return "The file contents already exists."

                os.remove(path_file)

                if find_images_test_file(dataset_test_path):
                    open(os.path.join(dataset_test_path, '.option1'),
                         'w')  # ONLY IMAGES
                else:
                    try:
                        f, n, c = find_image_files_folder_per_class(
                            dataset_test_path, require_all=False)
                        assert len(c) == len(self.get_target_labels())
                        open(os.path.join(dataset_test_path, '.option2'),
                             'w')  # FOLDER PER CLASS
                    except AssertionError:
                        try:
                            info_file = [
                                f for f in os.listdir(dataset_test_path)
                                if f.startswith('test.')
                                or f.startswith('labels.')
                            ]
                            assert len(info_file) == 1
                            info_path = os.path.join(dataset_test_path,
                                                     info_file[0])
                            f, n, c = find_image_files_from_file(
                                dataset_test_path,
                                info_path,
                                require_all=False)
                            assert len(c) == len(self.get_target_labels())
                            open(os.path.join(dataset_test_path, '.option3'),
                                 'w')  # LABELS.TXT
                            os.rename(
                                info_path,
                                os.path.join(dataset_test_path, 'labels.txt'))

                        except AssertionError:
                            tree_remove(dataset_test_path)
                            return "The file contents are not valid."
        except ValueError:
            tree_remove(dataset_test_path)
            return "The file contents are not valid."
        return 'ok'
Beispiel #3
0
    def test_request(self, request):
        has_targets = True
        df_test = {}
        test_filename = []
        labels = []
        if 'filename' in request.get_json():
            if get_filename(request) == 'TEST FROM SPLIT':
                test_filename = self._dataset.get_test_split_images()
                df_test = {
                    self._dataset.get_targets()[0]:
                    self._dataset.get_test_split_labels()
                }
            else:
                test_path = os.path.join(
                    self._dataset.get_dataset_path().replace('train', 'test'),
                    get_filename(request))
                option = [
                    f for f in os.listdir(test_path) if f.startswith('.option')
                ][0]

                if option == '.option0':
                    test_path = os.path.join(test_path,
                                             get_filename(request) + '.npz')
                    data = np.load(test_path)
                    test_filename = data['x']

                    if 'y' in data:
                        labels = data['y']
                        assert len(data['y'].shape) < 3  # TODO
                    else:
                        return False, test_filename, None, None
                elif option == '.option1':
                    test_filename = [
                        os.path.join(test_path, t)
                        for t in os.listdir(test_path) if not t.startswith('.')
                    ]
                    return False, test_filename, None, None

                elif option == '.option2':
                    all_classes = [
                        c for c in os.listdir(test_path)
                        if not c.startswith('.')
                    ]
                    for cl in all_classes:
                        list_files = [
                            os.path.join(test_path, cl, f)
                            for f in os.listdir(os.path.join(test_path, cl))
                            if not f.startswith('.')
                        ]
                        test_filename += list_files
                        labels += [cl] * len(list_files)

                elif option == '.option3':
                    labels_file = [
                        os.path.join(test_path, t)
                        for t in os.listdir(test_path)
                        if t.startswith('labels.')
                    ]
                    # print(labels_file)
                    test_filename, labels, _ = find_image_files_from_file(
                        test_path, labels_file[0], require_all=False)
                df_test[self._dataset.get_targets()[0]] = labels
            return has_targets, test_filename, df_test, None
        return False
Beispiel #4
0
def new_image_dataset(USER_ROOT, username, option, file):
    if isinstance(file, str):
        return False
    dataset_name = file.filename.split('.')[0]
    dataset_name, dataset_path = check_dataset_path(USER_ROOT, username, dataset_name)

    open(os.path.join(dataset_path, option_map[option]), 'w')

    dataset_test_path = os.path.join(dataset_path, 'test')
    os.makedirs(dataset_test_path, exist_ok=True)

    train_path = os.path.join(dataset_path, 'train')
    os.makedirs(train_path, exist_ok=True)

    filename = secure_filename(file.filename)
    path_file = os.path.join(dataset_path, filename)
    file.save(path_file)

    if option == 'option3':
        try:
            train_data, test_data = find_dataset_from_numpy(path_file)
            np.savez(os.path.join(train_path, filename), x=train_data[0], y=train_data[1])
            if test_data:
                os.makedirs(os.path.join(dataset_test_path, dataset_name), exist_ok=True)
                np.savez(os.path.join(dataset_test_path, dataset_name, dataset_name + '.npz'), x=test_data[0],
                         y=test_data[1])
                open(os.path.join(dataset_test_path, dataset_name, '.option0'), 'w')  # NUMPY FILE
            return dataset_name
        except Exception as e:
            tree_remove(dataset_path)
            raise e

    if not check_zip_file(path_file):
        tree_remove(dataset_path)
        raise ValueError('Invalid file.')

    unzip(path_file, train_path)
    try:
        if option == 'option1':
            if 'train' in os.listdir(train_path):
                rename(os.path.join(train_path, 'train'), train_path)
            find_image_files_folder_per_class(train_path)
            if 'test' in os.listdir(train_path):
                dataset_test_path = os.path.join(dataset_test_path, dataset_name)
                os.makedirs(dataset_test_path, exist_ok=True)
                rename(os.path.join(train_path, 'test'), dataset_test_path)
                find_image_files_folder_per_class(dataset_test_path, require_all=False)
                open(os.path.join(dataset_test_path, '.option2'), 'w')

        elif option == 'option2':
            info_file = [f for f in os.listdir(train_path) if f.startswith('labels.') or f.startswith('train.')]
            assert len(info_file) == 1
            os.rename(os.path.join(train_path, info_file[0]), os.path.join(train_path, 'labels.txt'))
            find_image_files_from_file(train_path, os.path.join(train_path, 'labels.txt'))

            info_test_file = [f for f in os.listdir(train_path) if f.startswith('test.')]
            if len(info_test_file) == 1:
                find_image_files_from_file(train_path, os.path.join(train_path, info_test_file[0]), require_all=False)

                dataset_test_path = os.path.join(dataset_test_path, dataset_name)
                os.makedirs(dataset_test_path, exist_ok=True)

                args = {}
                if not has_header(os.path.join(train_path, info_test_file[0])):
                    args['header'] = None

                df = pd.read_csv(os.path.join(train_path, info_test_file[0]), sep=None, engine='python', **args)

                filenames = df[df.columns[0]].values
                if not os.path.isfile(filenames[0]):
                    filenames = [os.path.join(train_path, f) for f in filenames]

                for f in filenames:
                    os.rename(f, os.path.join(dataset_test_path, os.path.basename(f)))
                os.rename(os.path.join(train_path, info_test_file[0]), os.path.join(dataset_test_path, 'labels.txt'))
                open(os.path.join(dataset_test_path, '.option3'), 'w')  # NUMPY FILE

    except AssertionError as e:
        tree_remove(dataset_path)
        raise e
    return dataset_name