Python DbReader Examples

Programming Language: Python

Namespace/Package Name: digits.utils.lmdbreader

Class/Type: DbReader

Examples at hotexamples.com: 11

Python DbReader - 11 examples found. These are the top rated real world Python examples of digits.utils.lmdbreader.DbReader extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DbReader(7)

entries(7)

entry(1)

Example #1

Show file

def explore():
    """
    Returns a gallery consisting of the images of one of the dbs
    """
    job = job_from_request()
    # Get LMDB
    db = job.path(flask.request.args.get('db'))
    db_path = job.path(db)
    labels = []

    if COLOR_PALETTE_ATTRIBUTE in job.extension_userdata:
        # assume single-channel 8-bit palette
        palette = job.extension_userdata[COLOR_PALETTE_ATTRIBUTE]
        palette = np.array(palette).reshape((len(palette)/3,3)) / 255.
        # normalize input pixels to [0,1]
        norm = mpl.colors.Normalize(vmin=0,vmax=255)
        # create map
        cmap = mpl.pyplot.cm.ScalarMappable(norm=norm,
                                            cmap=mpl.colors.ListedColormap(palette))
    else:
        cmap = None

    page = int(flask.request.args.get('page', 0))
    size = int(flask.request.args.get('size', 25))

    reader = DbReader(db_path)
    count = 0
    imgs = []

    min_page = max(0, page - 5)
    total_entries = reader.total_entries

    max_page = min((total_entries-1) / size, page + 5)
    pages = range(min_page, max_page + 1)
    for key, value in reader.entries():
        if count >= page*size:
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if not datum.encoded:
                raise RuntimeError("Expected encoded database")
            s = StringIO()
            s.write(datum.data)
            s.seek(0)
            img = PIL.Image.open(s)
            if cmap and img.mode in ['L', '1']:
                data = np.array(img)
                data = cmap.to_rgba(data)*255
                data = data.astype('uint8')
                # keep RGB values only, remove alpha channel
                data = data[:, :, 0:3]
                img = PIL.Image.fromarray(data)
            imgs.append({"label": None, "b64": utils.image.embed_image_html(img)})
        count += 1
        if len(imgs) >= size:
            break

    return flask.render_template('datasets/images/explore.html', page=page, size=size, job=job, imgs=imgs, labels=None, pages=pages, label=None, total_entries=total_entries, db=db)

Example #2

Show file

File: views.py Project: smichalowski/DIGITS

def explore():
    """
    Returns a gallery consisting of the images of one of the dbs
    """
    job = job_from_request()
    # Get LMDB
    db = job.path(flask.request.args.get('db'))
    db_path = job.path(db)
    labels = []

    page = int(flask.request.args.get('page', 0))
    size = int(flask.request.args.get('size', 25))

    reader = DbReader(db_path)
    count = 0
    imgs = []

    min_page = max(0, page - 5)
    total_entries = reader.total_entries

    max_page = min((total_entries - 1) / size, page + 5)
    pages = range(min_page, max_page + 1)
    for key, value in reader.entries():
        if count >= page * size:
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if not datum.encoded:
                raise RuntimeError("Expected encoded database")
            s = StringIO()
            s.write(datum.data)
            s.seek(0)
            img = PIL.Image.open(s)
            imgs.append({
                "label": None,
                "b64": utils.image.embed_image_html(img)
            })
        count += 1
        if len(imgs) >= size:
            break

    return flask.render_template('datasets/images/explore.html',
                                 page=page,
                                 size=size,
                                 job=job,
                                 imgs=imgs,
                                 labels=None,
                                 pages=pages,
                                 label=None,
                                 total_entries=total_entries,
                                 db=db)

Example #3

Show file

File: views.py Project: SevenGong/DIGITS

def explore():
    """
    Returns a gallery consisting of the images of one of the dbs
    """
    job = job_from_request()
    # Get LMDB
    db = job.path(flask.request.args.get('db'))
    db_path = job.path(db)
    labels = []

    page = int(flask.request.args.get('page', 0))
    size = int(flask.request.args.get('size', 25))

    reader = DbReader(db_path)
    count = 0
    imgs = []

    min_page = max(0, page - 5)
    total_entries = reader.total_entries

    max_page = min((total_entries-1) / size, page + 5)
    pages = range(min_page, max_page + 1)
    for key, value in reader.entries():
        if count >= page*size:
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if not datum.encoded:
                raise RuntimeError("Expected encoded database")
            s = StringIO()
            s.write(datum.data)
            s.seek(0)
            img = PIL.Image.open(s)
            imgs.append({"label": None, "b64": utils.image.embed_image_html(img)})
        count += 1
        if len(imgs) >= size:
            break

    return flask.render_template('datasets/images/explore.html', page=page, size=size, job=job, imgs=imgs, labels=None, pages=pages, label=None, total_entries=total_entries, db=db)

Example #4

Show file

def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size,
          layers, gpu, input_is_db, resize):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(
        dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" %
                             repr(epoch))

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset,
                                                 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []  # indices of samples within file list
    input_data = []  # sample data

    if input_is_db:
        # load images from database
        reader = DbReader(input_list)
        for key, value in reader.entries():
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if datum.encoded:
                s = StringIO()
                s.write(datum.data)
                s.seek(0)
                img = PIL.Image.open(s)
                img = np.array(img)
            else:
                import caffe.io
                arr = caffe.io.datum_to_array(datum)
                # CHW -> HWC
                arr = arr.transpose((1, 2, 0))
                if arr.shape[2] == 1:
                    # HWC -> HW
                    arr = arr[:, :, 0]
                elif arr.shape[2] == 3:
                    # BGR -> RGB
                    # XXX see issue #59
                    arr = arr[:, :, [2, 1, 0]]
                img = arr
            input_ids.append(key)
            input_data.append(img)
            n_input_samples = n_input_samples + 1
    else:
        # load paths from file
        paths = None
        with open(input_list) as infile:
            paths = infile.readlines()
        # load and resize images
        for idx, path in enumerate(paths):
            path = path.strip()
            try:
                image = utils.image.load_image(path.strip())
                if resize:
                    image = utils.image.resize_image(image,
                                                     height,
                                                     width,
                                                     channels=channels,
                                                     resize_mode=resize_mode)
                else:
                    image = utils.image.image_to_array(image,
                                                       channels=channels)
                input_ids.append(idx)
                input_data.append(image)
                n_input_samples = n_input_samples + 1
            except utils.errors.LoadImageError as e:
                print e

    # perform inference
    visualizations = None
    predictions = []

    if n_input_samples == 0:
        raise InferenceError("Unable to load any image from file '%s'" %
                             repr(input_list))
    elif n_input_samples == 1:
        # single image inference
        outputs, visualizations = model.train_task().infer_one(
            input_data[0],
            snapshot_epoch=epoch,
            layers=layers,
            gpu=gpu,
            resize=resize)
    else:
        if layers != 'none':
            raise InferenceError(
                "Layer visualization is not supported for multiple inference")
        outputs = model.train_task().infer_many(input_data,
                                                snapshot_epoch=epoch,
                                                gpu=gpu,
                                                resize=resize)

    # write to hdf5 file
    db_path = os.path.join(output_dir, 'inference.hdf5')
    db = h5py.File(db_path, 'w')

    # write input paths and images to database
    db.create_dataset("input_ids", data=input_ids)
    db.create_dataset("input_data", data=input_data)

    # write outputs to database
    db_outputs = db.create_group("outputs")
    for output_id, output_name in enumerate(outputs.keys()):
        output_data = outputs[output_name]
        output_key = base64.urlsafe_b64encode(str(output_name))
        dset = db_outputs.create_dataset(output_key, data=output_data)
        # add ID attribute so outputs can be sorted in
        # the order they appear in here
        dset.attrs['id'] = output_id

    # write visualization data
    if visualizations is not None and len(visualizations) > 0:
        db_layers = db.create_group("layers")
        for idx, layer in enumerate(visualizations):
            vis = layer['vis'] if layer['vis'] is not None else np.empty(0)
            dset = db_layers.create_dataset(str(idx), data=vis)
            dset.attrs['name'] = layer['name']
            dset.attrs['vis_type'] = layer['vis_type']
            if 'param_count' in layer:
                dset.attrs['param_count'] = layer['param_count']
            if 'layer_type' in layer:
                dset.attrs['layer_type'] = layer['layer_type']
            dset.attrs['shape'] = layer['data_stats']['shape']
            dset.attrs['mean'] = layer['data_stats']['mean']
            dset.attrs['stddev'] = layer['data_stats']['stddev']
            dset.attrs['histogram_y'] = layer['data_stats']['histogram'][0]
            dset.attrs['histogram_x'] = layer['data_stats']['histogram'][1]
            dset.attrs['histogram_ticks'] = layer['data_stats']['histogram'][2]
    db.close()
    logger.info('Saved data to %s', db_path)

Example #5

Show file

def explore():
    """
    Returns a gallery consisting of the images of one of the dbs
    """
    job = job_from_request()
    # Get LMDB
    db = flask.request.args.get('db', 'train')
    if 'train' in db.lower():
        task = job.train_db_task()
    elif 'val' in db.lower():
        task = job.val_db_task()
    elif 'test' in db.lower():
        task = job.test_db_task()
    if task is None:
        raise ValueError('No create_db task for {0}'.format(db))
    if task.status != 'D':
        raise ValueError(
            "This create_db task's status should be 'D' but is '{0}'".format(
                task.status))
    if task.backend != 'lmdb':
        raise ValueError(
            "Backend is {0} while expected backend is lmdb".format(
                task.backend))
    db_path = job.path(task.db_name)
    labels = task.get_labels()

    page = int(flask.request.args.get('page', 0))
    size = int(flask.request.args.get('size', 25))
    label = flask.request.args.get('label', None)

    if label is not None:
        try:
            label = int(label)
        except ValueError:
            label = None

    reader = DbReader(db_path)
    count = 0
    imgs = []

    min_page = max(0, page - 5)
    if label is None:
        total_entries = reader.total_entries
    else:
        # After PR#1500, task.distribution[str(label)] is a dictionary
        # with keys = 'count' and 'error_count'
        label_entries = task.distribution[str(label)]
        if isinstance(label_entries, dict):
            total_entries = label_entries['count']
        else:
            total_entries = label_entries

    max_page = min((total_entries - 1) // size, page + 5)
    pages = range(min_page, max_page + 1)
    for key, value in reader.entries():
        if count >= page * size:
            datum = dataset_pb2.Datum()
            datum.ParseFromString(value)
            if label is None or datum.label == label:
                if datum.encoded:
                    s = BytesIO()
                    s.write(datum.data)
                    s.seek(0)
                    img = PIL.Image.open(s)
                else:
                    arr = datum_to_array(datum)
                    # CHW -> HWC
                    arr = arr.transpose((1, 2, 0))
                    if arr.shape[2] == 1:
                        # HWC -> HW
                        arr = arr[:, :, 0]
                    elif arr.shape[2] == 3:
                        # BGR -> RGB
                        # XXX see issue #59
                        arr = arr[:, :, [2, 1, 0]]
                    img = PIL.Image.fromarray(arr)
                imgs.append({
                    "label": labels[datum.label],
                    "b64": utils.image.embed_image_html(img)
                })
        if label is None:
            count += 1
        else:
            datum = dataset_pb2.Datum()
            datum.ParseFromString(value)
            if datum.label == int(label):
                count += 1
        if len(imgs) >= size:
            break

    return flask.render_template('datasets/images/explore.html',
                                 page=page,
                                 size=size,
                                 job=job,
                                 imgs=imgs,
                                 labels=labels,
                                 pages=pages,
                                 label=label,
                                 total_entries=total_entries,
                                 db=db)

Example #6

Show file

File: views.py Project: Alandougherty/DIGITS

def explore():
    """
    Returns a gallery consisting of the images of one of the dbs
    """
    job = job_from_request()
    # Get LMDB
    db = flask.request.args.get('db', 'train')
    if 'train' in db.lower():
        task = job.train_db_task()
    elif 'val' in db.lower():
        task = job.val_db_task()
    elif 'test' in db.lower():
        task = job.test_db_task()
    if task is None:
        raise ValueError('No create_db task for {0}'.format(db))
    if task.status != 'D':
        raise ValueError("This create_db task's status should be 'D' but is '{0}'".format(task.status))
    if task.backend != 'lmdb':
        raise ValueError("Backend is {0} while expected backend is lmdb".format(task.backend))
    db_path = job.path(task.db_name)
    labels = task.get_labels()

    page = int(flask.request.args.get('page', 0))
    size = int(flask.request.args.get('size', 25))
    label = flask.request.args.get('label', None)

    if label is not None:
        try:
            label = int(label)
            label_str = labels[label]
        except ValueError:
            label = None

    reader = DbReader(db_path)
    count = 0
    imgs = []

    min_page = max(0, page - 5)
    if label is None:
        total_entries = reader.total_entries
    else:
        total_entries = task.distribution[str(label)]

    max_page = min((total_entries-1) / size, page + 5)
    pages = range(min_page, max_page + 1)
    for key, value in reader.entries():
        if count >= page*size:
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if label is None or datum.label == label:
                if datum.encoded:
                    s = StringIO()
                    s.write(datum.data)
                    s.seek(0)
                    img = PIL.Image.open(s)
                else:
                    import caffe.io
                    arr = caffe.io.datum_to_array(datum)
                    # CHW -> HWC
                    arr = arr.transpose((1,2,0))
                    if arr.shape[2] == 1:
                        # HWC -> HW
                        arr = arr[:,:,0]
                    elif arr.shape[2] == 3:
                        # BGR -> RGB
                        # XXX see issue #59
                        arr = arr[:,:,[2,1,0]]
                    img = PIL.Image.fromarray(arr)
                imgs.append({"label":labels[datum.label], "b64": utils.image.embed_image_html(img)})
        if label is None:
            count += 1
        else:
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if datum.label == int(label):
                count += 1
        if len(imgs) >= size:
            break

    return flask.render_template('datasets/images/explore.html', page=page, size=size, job=job, imgs=imgs, labels=labels, pages=pages, label=label, total_entries=total_entries, db=db)

Example #7

Show file

def infer(jobs_dir, model_id, epoch, batch_size, gpu):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(
        dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" %
                             repr(epoch))

    input_data = []  # sample data
    input_labels = []  # sample labels

    # load images from database
    feature_db_path = dataset.get_feature_db_path(utils.constants.TRAIN_DB)
    feature_reader = DbReader(feature_db_path)

    label_db_path = dataset.get_label_db_path(utils.constants.TRAIN_DB)
    label_reader = DbReader(label_db_path)

    embeddings = {'count': 0, 'images': None, 'zs': None}

    def aggregate(images, labels, attributes, embeddings):
        # perform inference
        outputs = model.train_task().infer_many(images,
                                                snapshot_epoch=epoch,
                                                gpu=gpu,
                                                resize=False)
        z_vectors = outputs['output'][:, :100]
        for image, label, z in zip(images, labels, z_vectors):
            if embeddings['images'] is None:
                embeddings['images'] = np.empty((N_EMBEDDINGS, ) + image.shape)
            if embeddings['zs'] is None:
                embeddings['zs'] = np.empty((N_EMBEDDINGS, ) + z.shape)
            if embeddings['count'] < N_EMBEDDINGS:
                embeddings['images'][embeddings['count']] = image
                embeddings['zs'][embeddings['count']] = z
                embeddings['count'] += 1
                if embeddings['count'] == N_EMBEDDINGS:
                    save_embeddings(embeddings)

            for attribute in range(attributes['n_attributes']):
                if label[attribute] > 0:
                    attributes['positive_attribute_z'][attribute] += z
                    attributes['positive_count'][attribute] += 1
                else:
                    attributes['negative_attribute_z'][attribute] += z
                    attributes['negative_count'][attribute] += 1
        # save
        save_attributes(attributes)

    n_input_samples = 0
    label_len = None
    z_dim = 100
    for key, value in feature_reader.entries():
        img = parse_datum(value)
        label = parse_datum(label_reader.entry(key))[0]
        if label_len is None:
            label_len = len(label)
            attributes = {
                'n_attributes': label_len,
                'negative_count': np.zeros(label_len),
                'positive_count': np.zeros(label_len),
                'negative_attribute_z': np.zeros((label_len, z_dim)),
                'positive_attribute_z': np.zeros((label_len, z_dim)),
            }
        elif label_len != len(label):
            raise ValueError("label len differs: %d vs %d" %
                             (label_len, len(label)))
        input_data.append(img)
        input_labels.append(label)
        n_input_samples = n_input_samples + 1
        if n_input_samples % batch_size == 0:
            aggregate(input_data, input_labels, attributes, embeddings)
            print("######## %d processed ########" % n_input_samples)
            input_data = []  # sample data
            input_labels = []  # sample labels

    if n_input_samples % batch_size != 0:
        aggregate(input_data, input_labels, attributes, embeddings)
        print("######## %d processed ########" % n_input_samples)

Example #8

Show file

File: inference.py Project: cepiross/synthetic_img_gen

def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size,
          layers, gpu, input_is_db, label_file, resize):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(
        dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" %
                             repr(epoch))

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset,
                                                 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []  # indices of samples within file list
    input_data = []  # sample data

    if input_is_db:
        # load images from database
        reader = DbReader(input_list)
        for key, value in reader.entries():
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if datum.encoded:
                s = StringIO()
                s.write(datum.data)
                s.seek(0)
                img = PIL.Image.open(s)
                img = np.array(img)
            else:
                import caffe.io
                arr = caffe.io.datum_to_array(datum)
                # CHW -> HWC
                arr = arr.transpose((1, 2, 0))
                if arr.shape[2] == 1:
                    # HWC -> HW
                    arr = arr[:, :, 0]
                elif arr.shape[2] == 3:
                    # BGR -> RGB
                    # XXX see issue #59
                    arr = arr[:, :, [2, 1, 0]]
                img = arr
            input_ids.append(key)
            input_data.append(img)
            n_input_samples = n_input_samples + 1
    else:
        # load paths from file
        paths = None
        with open(input_list) as infile:
            paths = infile.readlines()
        # load and resize images
        for idx, path in enumerate(paths):
            path = path.strip()
            try:
                image = utils.image.load_image(path.strip())
                if resize:
                    image = utils.image.resize_image(image,
                                                     height,
                                                     width,
                                                     channels=channels,
                                                     resize_mode=resize_mode)
                else:
                    image = utils.image.image_to_array(image,
                                                       channels=channels)
                input_ids.append(idx)
                input_data.append(image)
                n_input_samples = n_input_samples + 1
            except utils.errors.LoadImageError as e:
                print e

    labels = np.loadtxt(label_file, dtype='object')
    if fig is not None:
        # Plot original images to grid
        for row in range(NUM_ROWS):
            for col in range(NUM_COLS):
                idx = row * NUM_COLS + col
                pl.subplot(NUM_ROWS * 2, NUM_COLS,
                           row * 2 * NUM_COLS + col + 1)
                pl.xticks([])
                pl.yticks([])
                pl.imshow(input_data[idx], interpolation='nearest')

    # perform inference
    visualizations = None

    logger.info('Inference')
    if n_input_samples == 0:
        raise InferenceError("Unable to load any image from file '%s'" %
                             repr(input_list))
    elif n_input_samples == 1:
        # single image inference
        logger.info('Start')
        outputs, visualizations = model.train_task().infer_one(
            input_data[0],
            snapshot_epoch=epoch,
            layers=layers,
            gpu=gpu,
            resize=resize)
        logger.info('Done!')
    else:
        if layers != 'none':
            raise InferenceError(
                "Layer visualization is not supported for multiple inference")
        outputs = model.train_task().infer_many(input_data,
                                                snapshot_epoch=epoch,
                                                gpu=gpu,
                                                resize=resize)

    logger.info('Now it\'s time to pass results to write')

    # write to hdf5 file
    db_path = os.path.join(output_dir, 'inference.hdf5')
    db = h5py.File(db_path, 'w')

    # write input paths and images to database
    db.create_dataset("input_ids", data=input_ids)
    db.create_dataset("input_data", data=input_data)

    # write outputs to database
    db_outputs = db.create_group("outputs")
    for output_id, output_name in enumerate(outputs.keys()):
        output_data = outputs[output_name]
        if fig is not None:
            # Plot top-K inferences on grids
            for elem_id, elem_data in enumerate(output_data):
                row = elem_id // NUM_COLS
                col = elem_id % NUM_COLS
                img_labels = sorted(zip(elem_data, labels),
                                    key=lambda x: x[0])[-NUM_TOPK_CLASSES:]
                ax = pl.subplot(NUM_ROWS * 2,
                                NUM_COLS, (row * 2 + 1) * NUM_COLS + col + 1,
                                aspect='equal')
                ax.yaxis.set_label_position("right")
                ax.yaxis.set_label_coords(1.25, 0.5)
                pl.ylabel('Confidence score', rotation=-90, fontsize=16)

                height = 0.5
                ylocs = np.array(range(NUM_TOPK_CLASSES)) * height + 0.1
                width = max(ylocs)
                top_class = img_labels[-1][1]
                pl.barh(ylocs, [l[0]*width for l in img_labels], height=height, \
                        color=['r' if l[1] == top_class else 'b' for l in img_labels]) #color=['r' if l[1] == labels[true_label] else 'b' for l in img_labels])
                pl.yticks(ylocs + height / 2, [l[1] for l in img_labels],
                          fontsize=14)
                pl.xticks([0, width / 2.0, width], ['0%', '50%', '100%'])
                pl.ylim(0, ylocs[-1] + height + 0.1)
            pl.tight_layout()
            pl.show()
            fig.savefig('./test.pdf', dpi=300)
        output_key = base64.urlsafe_b64encode(str(output_name))
        dset = db_outputs.create_dataset(output_key, data=output_data)
        # add ID attribute so outputs can be sorted in
        # the order they appear in here
        dset.attrs['id'] = output_id

    # write visualization data
    if visualizations is not None and len(visualizations) > 0:
        db_layers = db.create_group("layers")
        for idx, layer in enumerate(visualizations):
            vis = layer['vis'] if layer['vis'] is not None else np.empty(0)
            dset = db_layers.create_dataset(str(idx), data=vis)
            dset.attrs['name'] = layer['name']
            dset.attrs['vis_type'] = layer['vis_type']
            if 'param_count' in layer:
                dset.attrs['param_count'] = layer['param_count']
            if 'layer_type' in layer:
                dset.attrs['layer_type'] = layer['layer_type']
            dset.attrs['shape'] = layer['data_stats']['shape']
            dset.attrs['mean'] = layer['data_stats']['mean']
            dset.attrs['stddev'] = layer['data_stats']['stddev']
            dset.attrs['histogram_y'] = layer['data_stats']['histogram'][0]
            dset.attrs['histogram_x'] = layer['data_stats']['histogram'][1]
            dset.attrs['histogram_ticks'] = layer['data_stats']['histogram'][2]
    db.close()
    logger.info('Saved data to %s', db_path)

Example #9

Show file

File: inference.py Project: Alandougherty/DIGITS

def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu, input_is_db):

    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []       # indices of samples within file list
    input_data = []      # sample data

    if input_is_db:
        # load images from database
        reader = DbReader(input_list)
        for key, value in reader.entries():
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if datum.encoded:
                s = StringIO()
                s.write(datum.data)
                s.seek(0)
                img = PIL.Image.open(s)
                img = np.array(img)
            else:
                import caffe.io
                arr = caffe.io.datum_to_array(datum)
                # CHW -> HWC
                arr = arr.transpose((1,2,0))
                if arr.shape[2] == 1:
                    # HWC -> HW
                    arr = arr[:,:,0]
                elif arr.shape[2] == 3:
                    # BGR -> RGB
                    # XXX see issue #59
                    arr = arr[:,:,[2,1,0]]
                img = arr
            input_ids.append(key)
            input_data.append(img)
            n_input_samples = n_input_samples + 1
    else:
        # load paths from file
        paths = None
        with open(input_list) as infile:
            paths = infile.readlines()
        # load and resize images
        for idx, path in enumerate(paths):
            path = path.strip()
            try:
                image = utils.image.load_image(path.strip())
                image = utils.image.resize_image(image,
                            height, width,
                            channels    = channels,
                            resize_mode = resize_mode,
                            )
                input_ids.append(idx)
                input_data.append(image)
                n_input_samples = n_input_samples + 1
            except utils.errors.LoadImageError as e:
                print e

    # perform inference
    visualizations = None
    predictions = []

    if n_input_samples == 0:
        raise InferenceError("Unable to load any image from file '%s'" % repr(input_list))
    elif n_input_samples == 1:
        # single image inference
        outputs, visualizations = model.train_task().infer_one(input_data[0], snapshot_epoch=epoch, layers=layers, gpu=gpu)
    else:
        assert layers == 'none'
        outputs = model.train_task().infer_many(input_data, snapshot_epoch=epoch, gpu=gpu)

    # write to hdf5 file
    db_path = os.path.join(output_dir, 'inference.hdf5')
    db = h5py.File(db_path, 'w')

    # write input paths and images to database
    db.create_dataset("input_ids", data = input_ids)
    db.create_dataset("input_data", data = input_data)

    # write outputs to database
    db_outputs = db.create_group("outputs")
    for output_id, output_name in enumerate(outputs.keys()):
        output_data = outputs[output_name]
        output_key = base64.urlsafe_b64encode(str(output_name))
        dset = db_outputs.create_dataset(output_key, data=output_data)
        # add ID attribute so outputs can be sorted in
        # the order they appear in here
        dset.attrs['id'] = output_id

    # write visualization data
    if visualizations is not None and len(visualizations)>0:
        db_layers = db.create_group("layers")
        for idx, layer in enumerate(visualizations):
            vis = layer['vis'] if layer['vis'] is not None else np.empty(0)
            dset = db_layers.create_dataset(str(idx), data=vis)
            dset.attrs['name'] = layer['name']
            dset.attrs['vis_type'] = layer['vis_type']
            if 'param_count' in layer:
                dset.attrs['param_count'] = layer['param_count']
            if 'layer_type' in layer:
                dset.attrs['layer_type'] = layer['layer_type']
            dset.attrs['shape'] = layer['data_stats']['shape']
            dset.attrs['mean'] = layer['data_stats']['mean']
            dset.attrs['stddev'] = layer['data_stats']['stddev']
            dset.attrs['histogram_y'] = layer['data_stats']['histogram'][0]
            dset.attrs['histogram_x'] = layer['data_stats']['histogram'][1]
            dset.attrs['histogram_ticks'] = layer['data_stats']['histogram'][2]
    db.close()
    logger.info('Saved data to %s', db_path)

Example #10

Show file

File: inference_agneev.py Project: agneevguin/Scania_Thesis

def infer(input_list,
          output_dir,
          jobs_dir,
          model_id,
          epoch,
          batch_size,
          layers,
          gpu,
          input_is_db,
          resize):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))
    
    # Set color dataset
    kwargs = {'colormap': 'dataset'}
    vis = Visualization(dataset, **kwargs)
    
    # Delete existing png segmented images
    for filename in glob.glob("/home/scania/Scania/Agneev/Tmp/*"):
        os.remove(filename) 

    # retrieve image dimensions and resize mode
    image_dims = dataset.get_feature_dims()
    height = image_dims[0]
    width = image_dims[1]
    channels = image_dims[2]
    resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash'

    n_input_samples = 0  # number of samples we were able to load
    input_ids = []       # indices of samples within file list
    input_data = []      # sample data
    input_filename = []

    if input_is_db:
        # load images from database
        reader = DbReader(input_list)
        for key, value in reader.entries():
            datum = caffe_pb2.Datum()
            datum.ParseFromString(value)
            if datum.encoded:
                s = StringIO()
                s.write(datum.data)
                s.seek(0)
                img = PIL.Image.open(s)
                img = np.array(img)
            else:
                import caffe.io
                arr = caffe.io.datum_to_array(datum)
                # CHW -> HWC
                arr = arr.transpose((1, 2, 0))
                if arr.shape[2] == 1:
                    # HWC -> HW
                    arr = arr[:, :, 0]
                elif arr.shape[2] == 3:
                    # BGR -> RGB
                    # XXX see issue #59
                    arr = arr[:, :, [2, 1, 0]]
                img = arr
            input_ids.append(key)
            input_data.append(img)
            n_input_samples = n_input_samples + 1
    else:
        # load paths from file
        paths = None
        try:
            if input_list.endswith('.h264') or input_list.endswith('.raw'):
                logging.info('Reading video...')
                ## http://stackoverflow.com/questions/33650974/opencv-python-read-specific-frame-using-videocapture
                cap = cv2.VideoCapture(input_list) #'/home/scania/Scania/Glantan_Recordings/2017-03-24_DrivePX2/dw_20170324_115921_0.000000_0.000000/video_front.h264')
                print cap
                frame_no = 0
                while frame_no < sys.maxint:
                    cap.set(1,frame_no);
                    ret, cv2_im = cap.read()
                    #if not ret:
                    #    break
                    cv2_im = cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB)
                    image = PIL.Image.fromarray(cv2_im)
                    # print image
                    if resize:
                        image = utils.image.resize_image(
                            image,
                            height,
                            width,
                            channels=channels,
                            resize_mode=resize_mode)
                    else:
                        image = utils.image.image_to_array(
                            image,
                            channels=channels)
                    # single image inference
                    outputs, visualizations = model.train_task().infer_one(
                        image,
                        snapshot_epoch=epoch,
                        layers=layers,
                        gpu=gpu,
                        resize=resize)

                    out = dict([outputs.items()][0])
                    out['score'] = out.items()[0][1][0]
                    vis.process_data(n_input_samples, image, out, 'Video_file')
                    n_input_samples = n_input_samples + 1
                    frame_no = frame_no + 30

            elif input_list.endswith('.txt'):
    
                logging.info('Reading images...')
                with open(input_list) as infile:
                    paths = infile.readlines()
                # load and resize images
                for idx, path in enumerate(paths):
                    path = path.strip()
                    try:
                        image = utils.image.load_image(path.strip())
                        if resize:
                            image = utils.image.resize_image(
                                image,
                                height,
                                width,
                                channels=channels,
                                resize_mode=resize_mode)
                        else:
                            image = utils.image.image_to_array(
                                image,
                                channels=channels)

                        # single image inference
                        outputs, visualizations = model.train_task().infer_one(
                            image,
                            snapshot_epoch=epoch,
                            layers=layers,
                            gpu=gpu,
                            resize=resize)

                        # Find filename
                        head, tail = os.path.split(path)
                        filename = tail.split('.')[0]
                        out = dict([outputs.items()][0])
                        out['score'] = out.items()[0][1][0]
                        vis.process_data(n_input_samples, image, out, filename)
                        n_input_samples = n_input_samples + 1

                    except utils.errors.LoadImageError as e:
                        print e
            else:
                print 'Cannot read image or video file. \nPlease provide .h264, .raw or .txt file only.'
        except cv2.error as e:
            print e

Example #11

Show file

File: gan_features.py Project: Dasona/DIGITS

def infer(jobs_dir,
          model_id,
          epoch,
          batch_size,
          gpu):
    """
    Perform inference on a list of images using the specified model
    """
    # job directory defaults to that defined in DIGITS config
    if jobs_dir == 'none':
        jobs_dir = digits.config.config_value('jobs_dir')

    # load model job
    model_dir = os.path.join(jobs_dir, model_id)
    assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir
    model = Job.load(model_dir)

    # load dataset job
    dataset_dir = os.path.join(jobs_dir, model.dataset_id)
    assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir
    dataset = Job.load(dataset_dir)
    for task in model.tasks:
        task.dataset = dataset

    # retrieve snapshot file
    task = model.train_task()
    snapshot_filename = None
    epoch = float(epoch)
    if epoch == -1 and len(task.snapshots):
        # use last epoch
        epoch = task.snapshots[-1][1]
        snapshot_filename = task.snapshots[-1][0]
    else:
        for f, e in task.snapshots:
            if e == epoch:
                snapshot_filename = f
                break
    if not snapshot_filename:
        raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch))

    input_data = []      # sample data
    input_labels = []    # sample labels

    # load images from database
    feature_db_path = dataset.get_feature_db_path(utils.constants.TRAIN_DB)
    feature_reader = DbReader(feature_db_path)

    label_db_path = dataset.get_label_db_path(utils.constants.TRAIN_DB)
    label_reader = DbReader(label_db_path)

    embeddings = {'count': 0, 'images': None, 'zs': None}

    def aggregate(images, labels, attributes, embeddings):
        # perform inference
        outputs = model.train_task().infer_many(
            images,
            snapshot_epoch=epoch,
            gpu=gpu,
            resize=False)
        z_vectors = outputs['output'][:, :100]
        for image, label, z in zip(images, labels, z_vectors):
            if embeddings['images'] is None:
                embeddings['images'] = np.empty((N_EMBEDDINGS,) + image.shape)
            if embeddings['zs'] is None:
                embeddings['zs'] = np.empty((N_EMBEDDINGS,) + z.shape)
            if embeddings['count'] < N_EMBEDDINGS:
                embeddings['images'][embeddings['count']] = image
                embeddings['zs'][embeddings['count']] = z
                embeddings['count'] += 1
                if embeddings['count'] == N_EMBEDDINGS:
                    save_embeddings(embeddings)

            for attribute in range(attributes['n_attributes']):
                if label[attribute] > 0:
                    attributes['positive_attribute_z'][attribute] += z
                    attributes['positive_count'][attribute] += 1
                else:
                    attributes['negative_attribute_z'][attribute] += z
                    attributes['negative_count'][attribute] += 1
        # save
        save_attributes(attributes)

    n_input_samples = 0
    label_len = None
    z_dim = 100
    for key, value in feature_reader.entries():
        img = parse_datum(value)
        label = parse_datum(label_reader.entry(key))[0]
        if label_len is None:
            label_len = len(label)
            attributes = {
                'n_attributes': label_len,
                'negative_count': np.zeros(label_len),
                'positive_count': np.zeros(label_len),
                'negative_attribute_z': np.zeros((label_len, z_dim)),
                'positive_attribute_z': np.zeros((label_len, z_dim)),
            }
        elif label_len != len(label):
            raise ValueError("label len differs: %d vs %d" % (label_len, len(label)))
        input_data.append(img)
        input_labels.append(label)
        n_input_samples = n_input_samples + 1
        if n_input_samples % batch_size == 0:
            aggregate(input_data, input_labels, attributes, embeddings)
            print("######## %d processed ########" % n_input_samples)
            input_data = []      # sample data
            input_labels = []    # sample labels

    if n_input_samples % batch_size != 0:
        aggregate(input_data, input_labels, attributes, embeddings)
        print("######## %d processed ########" % n_input_samples)