Exemple #1
0
def encode_param_command(args, **kwargs):
    # Load Parameters
    in_files = [f for f in os.listdir(
        args.indir) if os.path.isfile(os.path.join(args.indir, f))]
    logger.log(99, 'Loading parameters...')
    for file_path in in_files:
        logger.log(99, file_path)
        load_param_in_txt(os.path.splitext(file_path)[0].replace(
            '~', '/'), os.path.join(args.indir, file_path))

    # Save prameter
    logger.log(99, 'Saving parameters...')
    save_parameters(args.param)

    logger.log(99, 'Encode Parameter Completed.')
Exemple #2
0
def decode_param_command(args, **kwargs):
    if not os.path.exists(args.outdir):
        os.makedirs(args.outdir)

    # Load prameter
    logger.log(99, 'Loading parameters...')
    load_parameters(args.param)

    # Save Parameters
    params = get_parameters(grad_only=False)
    for key, variable in params.items():
        logger.log(99, key)
        file_path = args.outdir + os.sep + key.replace('/', '~') + '.txt'
        dir = os.path.dirname(file_path)
        if not os.path.exists(dir):
            os.makedirs(dir)
        save_param_in_txt(variable.d, file_path)

    logger.log(99, 'Decode Parameter Completed.')
def create_cache_file(args):
    # settings
    source_dir = args.sourcedir
    dest_dir = args.outdir
    width = int(args.width)
    height = int(args.height)
    padding = args.mode == 'padding'
    shuffle = args.shuffle == 'true'

    if source_dir == dest_dir:
        logger.critical("Input directory and output directory are same.")
        return

    # create file list
    logger.log(99, "Creating file list...")
    dirs = os.listdir(source_dir)
    dirs = [d for d in dirs if os.path.isdir(os.path.join(source_dir, d))]
    dirs.sort()
    # print(dirs)

    labels = []
    label_index = -1
    csv_data = []
    pbar = tqdm.tqdm(total=100, unit='%')
    last = 0
    for i, dir in enumerate(dirs):
        # print(dir)
        full_path = os.path.join(source_dir, dir)
        files = os.listdir(full_path)
        files = [
            f for f in files if os.path.isfile(os.path.join(full_path, f))
        ]
        files.sort()
        found = False
        for i2, file in enumerate(files):
            file_name = os.path.join(full_path, file)
            if imghdr.what(file_name) is not None:
                if not found:
                    labels.append(dir)
                    label_index += 1
                    found = True
                csv_data.append([os.path.join('.', dir, file), label_index])
            current = round(100 * (float(i) / len(dirs) + float(i2) /
                                   (len(dirs) * len(files))))
            if last < current:
                pbar.update(current - last)
                last = current
    pbar.close()

    logger.log(99, "Creating cache files...")
    if shuffle:
        import random
        random.shuffle(csv_data)

    data_size = 100
    num_data_files = int((len(csv_data) - 1) / data_size + 1)
    for i in tqdm.tqdm(range(num_data_files)):
        num_image = data_size if (i + 1) * data_size < len(
            csv_data) else len(csv_data) - i * data_size
        data = {}
        data['x'] = []
        data['y'] = []
        for i2 in range(num_image):
            image_file_name = csv_data[i2 + i * data_size][0]
            class_index_str = csv_data[i2 + i * data_size][1]
            image_file_name = source_dir + csv_data[i2 + i * data_size][0][1:]
            class_index = int(class_index_str)
            if os.path.exists(image_file_name):
                im = scipy.misc.imread(image_file_name, mode='RGB')
                # resize
                h = im.shape[0]
                w = im.shape[1]
                # print(h, w)
                if w != width or h != height:
                    # resize image
                    if not padding:
                        # trimming mode
                        if float(h) / w > float(height) / width:
                            target_h = int(float(w) / width * height)
                            # print('crop_target_h', target_h)
                            im = im[(h - target_h) // 2:h -
                                    (h - target_h) // 2, ::]
                        else:
                            target_w = int(float(h) / height * width)
                            # print('crop_target_w', target_w)
                            im = im[::, (w - target_w) // 2:w -
                                    (w - target_w) // 2]
                        # print('before', im.shape)
                        im = scipy.misc.imresize(arr=im,
                                                 size=(height, width),
                                                 interp='lanczos')
                        # print('after', im.shape)
                    else:
                        # padding mode
                        if float(h) / w < float(height) / width:
                            target_h = int(float(height) / width * w)
                            # print('padding_target_h', target_h)
                            pad = (((target_h - h) // 2,
                                    target_h - (target_h - h) // 2 - h), (0,
                                                                          0))
                        else:
                            target_w = int(float(width) / height * h)
                            # print('padding_target_w', target_w)
                            pad = ((0, 0),
                                   ((target_w - w) // 2,
                                    target_w - (target_w - w) // 2 - w))
                        pad = pad + ((0, 0), )
                        im = np.pad(im, pad, 'constant')
                        # print('before', im.shape)
                        im = scipy.misc.imresize(arr=im,
                                                 size=(height, width),
                                                 interp='lanczos')
                        # print('after', im.shape)
                    x = np.array(im, dtype=np.uint8).transpose((2, 0, 1))
                # print x.shape, x.dtype
                data['x'].append(x)
                data['y'].append(np.array([class_index], dtype=np.int16))
            else:
                print(image_file_name, ' is not found.')
        out_file_name = dest_dir + '/data{:04d}_{}.h5'.format(i, num_image)
        h5 = h5py.File(out_file_name, 'w')
        h5.create_dataset('y', data=data['y'])
        h5.create_dataset('x', data=data['x'])
        h5.close
def create_image_classification_dataset_command(args):
    # settings
    source_dir = args.sourcedir
    dest_dir = args.outdir
    width = int(args.width)
    height = int(args.height)
    mode = args.mode
    ch = int(args.channel)
    shuffle = args.shuffle == 'true'

    dest_csv_file_name = [os.path.join(args.outdir, args.file1)]
    if args.file2:
        dest_csv_file_name.append(os.path.join(args.outdir, args.file2))
    test_data_ratio = int(args.ratio2) if args.ratio2 else 0

    if args.sourcedir == args.outdir:
        logger.critical("Input directory and output directory are same.")
        return False

    # create file list
    logger.log(99, "Creating file list...")
    dirs = os.listdir(args.sourcedir)
    dirs = [d for d in dirs if os.path.isdir(os.path.join(args.sourcedir, d))]
    dirs.sort()
    # print(dirs)

    labels = []
    label_index = -1
    csv_data = []
    pbar = tqdm.tqdm(total=100, unit='%')
    last = 0
    for i, dir in enumerate(dirs):
        # print(dir)
        full_path = os.path.join(args.sourcedir, dir)
        files = os.listdir(full_path)
        files = [
            f for f in files if os.path.isfile(os.path.join(full_path, f))
        ]
        files.sort()
        found = False
        for i2, file in enumerate(files):
            file_name = os.path.join(full_path, file)
            if re.search('\.(bmp|jpg|jpeg|png|gif|tif|tiff)',
                         os.path.splitext(file_name)[1], re.IGNORECASE):
                if not found:
                    labels.append(dir)
                    label_index += 1
                    found = True
                csv_data.append([os.path.join('.', dir, file), label_index])
            current = round(100 * (float(i) / len(dirs) + float(i2) /
                                   (len(dirs) * len(files))))
            if last < current:
                pbar.update(current - last)
                last = current
    pbar.close()

    # create output data
    logger.log(99, "Creating output images...")
    process_args = [(data[0], source_dir, dest_dir, width, height, mode, ch)
                    for data in csv_data]
    p = mp.Pool(mp.cpu_count())
    pbar = tqdm.tqdm(total=len(process_args))
    for _ in p.imap_unordered(convert_image, process_args):
        pbar.update()
    pbar.close()

    for data in csv_data:
        file_name = os.path.splitext(data[0])[0] + ".png"
        data[0] = file_name if os.path.exists(os.path.join(
            dest_dir, file_name)) else None
    for data in csv_data[:]:
        if not data[0]:
            csv_data.remove(data)

    logger.log(99, "Creating CSV files...")
    if shuffle:
        import random
        random.shuffle(csv_data)

    csv_data_num = [(len(csv_data) * (100 - test_data_ratio)) // 100]
    csv_data_num.append(len(csv_data) - csv_data_num[0])
    data_head = 0
    for csv_file_name, data_num in zip(dest_csv_file_name, csv_data_num):
        if data_num:
            csv_data_2 = csv_data[data_head:data_head + data_num]
            data_head += data_num

            csv_data_2.insert(0, ['x:image', 'y:label'])
            with open(csv_file_name, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                writer.writerows(csv_data_2)

    logger.log(99, "Dataset was successfully created.")
    return True
Exemple #5
0
def create_image_classification_dataset_command(args):
    # settings
    source_dir = args.sourcedir
    dest_csv_file_name = [os.path.join(args.outdir, args.file1)]
    if args.file2:
        dest_csv_file_name.append(os.path.join(args.outdir, args.file2))
    dest_dir = args.outdir
    width = int(args.width)
    height = int(args.height)
    padding = args.mode == 'padding'
    ch = int(args.channel)
    shuffle = args.shuffle == 'true'
    test_data_ratio = int(args.ratio2) if args.ratio2 else 0

    if source_dir == dest_dir:
        logger.critical("Input directory and output directory are same.")
        return

    # create file list
    logger.log(99, "Creating file list...")
    dirs = os.listdir(source_dir)
    dirs = [d for d in dirs if os.path.isdir(os.path.join(source_dir, d))]
    dirs.sort()
    # print(dirs)

    labels = []
    label_index = -1
    csv_data = []
    pbar = tqdm.tqdm(total=100, unit='%')
    last = 0
    for i, dir in enumerate(dirs):
        # print(dir)
        full_path = os.path.join(source_dir, dir)
        files = os.listdir(full_path)
        files = [
            f for f in files if os.path.isfile(os.path.join(full_path, f))
        ]
        files.sort()
        found = False
        for i2, file in enumerate(files):
            file_name = os.path.join(full_path, file)
            if imghdr.what(file_name) is not None:
                if not found:
                    labels.append(dir)
                    label_index += 1
                    found = True
                csv_data.append([os.path.join('.', dir, file), label_index])
            current = round(100 * (float(i) / len(dirs) + float(i2) /
                                   (len(dirs) * len(files))))
            if last < current:
                pbar.update(current - last)
                last = current
    pbar.close()

    # create output data
    logger.log(99, "Creating output images...")
    for data in tqdm.tqdm(csv_data, unit='images'):
        src_file_name = os.path.join(source_dir, data[0])
        data[0] = os.path.splitext(data[0])[0] + ".png"
        dest_file_name = os.path.join(dest_dir, data[0])
        dest_path = os.path.dirname(dest_file_name)
        # print(src_file_name, dest_file_name)

        # open source image
        im = scipy.misc.imread(src_file_name)
        if len(im.shape) < 2 or len(im.shape) > 3:
            logger.warning(
                "Illigal image file format %s.".format(src_file_name))
            csv_data.remove(data)
            continue
        elif len(im.shape) == 3:
            # RGB image
            if im.shape[2] != 3:
                logger.warning(
                    "The image must be RGB or monochrome %s.".format(
                        src_file_name))
                csv_data.remove(data)
                continue

        # resize
        h = im.shape[0]
        w = im.shape[1]
        # print(h, w)
        if w != width or h != height:
            # resize image
            if not padding:
                # trimming mode
                if float(h) / w > float(height) / width:
                    target_h = int(float(w) / width * height)
                    # print('crop_target_h', target_h)
                    im = im[(h - target_h) // 2:h - (h - target_h) // 2, ::]
                else:
                    target_w = int(float(h) / height * width)
                    # print('crop_target_w', target_w)
                    im = im[::, (w - target_w) // 2:w - (w - target_w) // 2]
                # print('before', im.shape)
                im = scipy.misc.imresize(arr=im,
                                         size=(height, width),
                                         interp='lanczos')
                # print('after', im.shape)
            else:
                # padding mode
                if float(h) / w < float(height) / width:
                    target_h = int(float(height) / width * w)
                    # print('padding_target_h', target_h)
                    pad = (((target_h - h) // 2,
                            target_h - (target_h - h) // 2 - h), (0, 0))
                else:
                    target_w = int(float(width) / height * h)
                    # print('padding_target_w', target_w)
                    pad = ((0, 0), ((target_w - w) // 2,
                                    target_w - (target_w - w) // 2 - w))
                if len(im.shape) == 3:
                    pad = pad + ((0, 0), )
                im = np.pad(im, pad, 'constant')
                # print('before', im.shape)
                im = scipy.misc.imresize(arr=im,
                                         size=(height, width),
                                         interp='lanczos')
                # print('after', im.shape)

        # change color ch
        if len(im.shape) == 2 and ch == 3:
            # Monochrome to RGB
            im = np.array([im, im, im]).transpose((1, 2, 0))
        elif len(im.shape) == 3 and ch == 1:
            # RGB to monochrome
            im = np.dot(im[..., :3], [0.299, 0.587, 0.114])

        # output
        if not os.path.exists(dest_path):
            os.makedirs(dest_path)
        scipy.misc.imsave(dest_file_name, im)

    logger.log(99, "Creating CSV files...")
    if shuffle:
        import random
        random.shuffle(csv_data)

    csv_data_num = [(len(csv_data) * (100 - test_data_ratio)) // 100]
    csv_data_num.append(len(csv_data) - csv_data_num[0])
    data_head = 0
    for csv_file_name, data_num in zip(dest_csv_file_name, csv_data_num):
        if data_num:
            csv_data_2 = csv_data[data_head:data_head + data_num]
            data_head += data_num

            csv_data_2.insert(0, ['x:image', 'y:label'])
            with open(csv_file_name, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                writer.writerows(csv_data_2)
Exemple #6
0
def create_object_detection_dataset_command(args):
    # settings
    source_dir = args.sourcedir
    dest_dir = args.outdir
    width = int(args.width)
    height = int(args.height)
    mode = args.mode
    ch = int(args.channel)
    num_class = int(args.num_class)
    grid_size = int(args.grid_size)
    shuffle = args.shuffle == 'true'
    num_anchor = int(args.num_anchor)

    if width % grid_size != 0:
        logger.log(99, 'width" must be divisible by grid_size.')
        return
    if height % grid_size != 0:
        logger.log(99, 'height must be divisible by grid_size.')
        return

    dest_csv_file_name = [os.path.join(args.outdir, args.file1)]
    if args.file2:
        dest_csv_file_name.append(os.path.join(args.outdir, args.file2))
    test_data_ratio = int(args.ratio2) if args.ratio2 else 0

    if args.sourcedir == args.outdir:
        logger.critical("Input directory and output directory are same.")
        return False

    # create file list
    logger.log(99, "Creating file list...")

    def create_file_list(dir=""):
        result = []
        items = os.listdir(os.path.join(source_dir, dir))
        for item in items:
            if os.path.isdir(os.path.join(source_dir, dir, item)):
                result.extend(create_file_list(os.path.join(dir, item)))
            elif re.search('\.(bmp|jpg|jpeg|png|gif|tif|tiff)', os.path.splitext(item)[1], re.IGNORECASE):
                result.append(os.path.join(dir, item))
        return result

    file_list = create_file_list()

    if len(file_list) == 0:
        logger.critical(
            "No image file found in the subdirectory of the input directory.")
        return False

    # calc anchor
    logger.log(99, "Calculating anchors...")
    anchors = get_anchors(source_dir, file_list, num_anchor)

    # create output data
    logger.log(99, "Creating output images...")
    process_args = [(data, source_dir, dest_dir, width,
                     height, mode, ch, num_class, grid_size, anchors) for data in file_list]
    p = mp.Pool(mp.cpu_count())
    pbar = tqdm.tqdm(total=len(process_args))
    for _ in p.imap_unordered(convert_image, process_args):
        pbar.update()
    pbar.close()

    file_list = [os.path.join('.', 'data', file) for file in file_list]
    file_list = [file for file in file_list if os.path.exists(
        os.path.join(dest_dir, os.path.splitext(file)[0] + '.png'))]
    if len(file_list) == 0:
        logger.critical("No image and label file created correctly.")
        return False

    logger.log(99, "Creating CSV files...")
    if shuffle:
        import random
        random.shuffle(file_list)

    csv_data_num = [(len(file_list) * (100 - test_data_ratio)) // 100]
    csv_data_num.append(len(file_list) - csv_data_num[0])
    data_head = 0
    for csv_file_name, data_num in zip(dest_csv_file_name, csv_data_num):
        if data_num:
            file_list_2 = file_list[data_head:data_head + data_num]
            data_head += data_num

            with open(csv_file_name, 'w') as f:
                writer = csv.writer(f, lineterminator='\n')
                writer.writerow(['x:image', 'y:label', 'r:region'])
                for file in file_list_2:
                    base_file_name = os.path.splitext(file)[0]
                    writer.writerow([file, os.path.splitext(
                        file)[0] + '_label.csv', os.path.splitext(file)[0] + '_region.csv'])

    logger.log(99, "Dataset was successfully created.")
    return True