def create_generic_db(jobs_dir, dataset_id, stage): """ Create a generic DB """ # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load dataset job dataset_dir = os.path.join(jobs_dir, dataset_id) if not os.path.isdir(dataset_dir): raise IOError("Dataset dir %s does not exist" % dataset_dir) dataset = Job.load(dataset_dir) # create instance of extension extension_id = dataset.extension_id extension_class = extensions.data.get_extension(extension_id) extension = extension_class(**dataset.extension_userdata) # encoding feature_encoding = dataset.feature_encoding label_encoding = dataset.label_encoding batch_size = dataset.batch_size num_threads = dataset.num_threads force_same_shape = dataset.force_same_shape # create main DB creator object and execute main method db_creator = DbCreator() db_creator.create_db(extension, stage, dataset_dir, batch_size, num_threads, feature_encoding, label_encoding, force_same_shape) logger.info('Generic DB creation Done')
def create_generic_db(jobs_dir, dataset_id, stage): """ Create a generic DB """ # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load dataset job dataset_dir = os.path.join(jobs_dir, dataset_id) if not os.path.isdir(dataset_dir): raise IOError("Dataset dir %s does not exist" % dataset_dir) dataset = Job.load(dataset_dir) # create instance of extension extension_id = dataset.extension_id extension_class = extensions.data.get_extension(extension_id) extension = extension_class(**dataset.extension_userdata) # encoding feature_encoding = dataset.feature_encoding label_encoding = dataset.label_encoding batch_size = dataset.batch_size num_threads = dataset.num_threads force_same_shape = dataset.force_same_shape # create main DB creator object and execute main method db_creator = DbCreator() db_creator.create_db( extension, stage, dataset_dir, batch_size, num_threads, feature_encoding, label_encoding, force_same_shape) logger.info('Generic DB creation Done')
def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu, input_is_db, resize): """ Perform inference on a list of images using the specified model """ # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load model job model_dir = os.path.join(jobs_dir, model_id) assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir model = Job.load(model_dir) # load dataset job dataset_dir = os.path.join(jobs_dir, model.dataset_id) assert os.path.isdir( dataset_dir), "Dataset dir %s does not exist" % dataset_dir dataset = Job.load(dataset_dir) for task in model.tasks: task.dataset = dataset # retrieve snapshot file task = model.train_task() snapshot_filename = None epoch = float(epoch) if epoch == -1 and len(task.snapshots): # use last epoch epoch = task.snapshots[-1][1] snapshot_filename = task.snapshots[-1][0] else: for f, e in task.snapshots: if e == epoch: snapshot_filename = f break if not snapshot_filename: raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch)) # retrieve image dimensions and resize mode image_dims = dataset.get_feature_dims() height = image_dims[0] width = image_dims[1] channels = image_dims[2] resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash' n_input_samples = 0 # number of samples we were able to load input_ids = [] # indices of samples within file list input_data = [] # sample data if input_is_db: # load images from database reader = DbReader(input_list) for key, value in reader.entries(): datum = caffe_pb2.Datum() datum.ParseFromString(value) if datum.encoded: s = StringIO() s.write(datum.data) s.seek(0) img = PIL.Image.open(s) img = np.array(img) else: import caffe.io arr = caffe.io.datum_to_array(datum) # CHW -> HWC arr = arr.transpose((1, 2, 0)) if arr.shape[2] == 1: # HWC -> HW arr = arr[:, :, 0] elif arr.shape[2] == 3: # BGR -> RGB # XXX see issue #59 arr = arr[:, :, [2, 1, 0]] img = arr input_ids.append(key) input_data.append(img) n_input_samples = n_input_samples + 1 else: # load paths from file paths = None with open(input_list) as infile: paths = infile.readlines() # load and resize images for idx, path in enumerate(paths): path = path.strip() try: image = utils.image.load_image(path.strip()) if resize: image = utils.image.resize_image(image, height, width, channels=channels, resize_mode=resize_mode) else: image = utils.image.image_to_array(image, channels=channels) input_ids.append(idx) input_data.append(image) n_input_samples = n_input_samples + 1 except utils.errors.LoadImageError as e: print e # perform inference visualizations = None predictions = [] if n_input_samples == 0: raise InferenceError("Unable to load any image from file '%s'" % repr(input_list)) elif n_input_samples == 1: # single image inference outputs, visualizations = model.train_task().infer_one( input_data[0], snapshot_epoch=epoch, layers=layers, gpu=gpu, resize=resize) else: if layers != 'none': raise InferenceError( "Layer visualization is not supported for multiple inference") outputs = model.train_task().infer_many(input_data, snapshot_epoch=epoch, gpu=gpu, resize=resize) # write to hdf5 file db_path = os.path.join(output_dir, 'inference.hdf5') db = h5py.File(db_path, 'w') # write input paths and images to database db.create_dataset("input_ids", data=input_ids) db.create_dataset("input_data", data=input_data) # write outputs to database db_outputs = db.create_group("outputs") for output_id, output_name in enumerate(outputs.keys()): output_data = outputs[output_name] output_key = base64.urlsafe_b64encode(str(output_name)) dset = db_outputs.create_dataset(output_key, data=output_data) # add ID attribute so outputs can be sorted in # the order they appear in here dset.attrs['id'] = output_id # write visualization data if visualizations is not None and len(visualizations) > 0: db_layers = db.create_group("layers") for idx, layer in enumerate(visualizations): vis = layer['vis'] if layer['vis'] is not None else np.empty(0) dset = db_layers.create_dataset(str(idx), data=vis) dset.attrs['name'] = layer['name'] dset.attrs['vis_type'] = layer['vis_type'] if 'param_count' in layer: dset.attrs['param_count'] = layer['param_count'] if 'layer_type' in layer: dset.attrs['layer_type'] = layer['layer_type'] dset.attrs['shape'] = layer['data_stats']['shape'] dset.attrs['mean'] = layer['data_stats']['mean'] dset.attrs['stddev'] = layer['data_stats']['stddev'] dset.attrs['histogram_y'] = layer['data_stats']['histogram'][0] dset.attrs['histogram_x'] = layer['data_stats']['histogram'][1] dset.attrs['histogram_ticks'] = layer['data_stats']['histogram'][2] db.close() logger.info('Saved data to %s', db_path)
def infer(jobs_dir, model_id, epoch, batch_size, gpu): """ Perform inference on a list of images using the specified model """ # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load model job model_dir = os.path.join(jobs_dir, model_id) assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir model = Job.load(model_dir) # load dataset job dataset_dir = os.path.join(jobs_dir, model.dataset_id) assert os.path.isdir( dataset_dir), "Dataset dir %s does not exist" % dataset_dir dataset = Job.load(dataset_dir) for task in model.tasks: task.dataset = dataset # retrieve snapshot file task = model.train_task() snapshot_filename = None epoch = float(epoch) if epoch == -1 and len(task.snapshots): # use last epoch epoch = task.snapshots[-1][1] snapshot_filename = task.snapshots[-1][0] else: for f, e in task.snapshots: if e == epoch: snapshot_filename = f break if not snapshot_filename: raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch)) input_data = [] # sample data input_labels = [] # sample labels # load images from database feature_db_path = dataset.get_feature_db_path(utils.constants.TRAIN_DB) feature_reader = DbReader(feature_db_path) label_db_path = dataset.get_label_db_path(utils.constants.TRAIN_DB) label_reader = DbReader(label_db_path) embeddings = {'count': 0, 'images': None, 'zs': None} def aggregate(images, labels, attributes, embeddings): # perform inference outputs = model.train_task().infer_many(images, snapshot_epoch=epoch, gpu=gpu, resize=False) z_vectors = outputs['output'][:, :100] for image, label, z in zip(images, labels, z_vectors): if embeddings['images'] is None: embeddings['images'] = np.empty((N_EMBEDDINGS, ) + image.shape) if embeddings['zs'] is None: embeddings['zs'] = np.empty((N_EMBEDDINGS, ) + z.shape) if embeddings['count'] < N_EMBEDDINGS: embeddings['images'][embeddings['count']] = image embeddings['zs'][embeddings['count']] = z embeddings['count'] += 1 if embeddings['count'] == N_EMBEDDINGS: save_embeddings(embeddings) for attribute in range(attributes['n_attributes']): if label[attribute] > 0: attributes['positive_attribute_z'][attribute] += z attributes['positive_count'][attribute] += 1 else: attributes['negative_attribute_z'][attribute] += z attributes['negative_count'][attribute] += 1 # save save_attributes(attributes) n_input_samples = 0 label_len = None z_dim = 100 for key, value in feature_reader.entries(): img = parse_datum(value) label = parse_datum(label_reader.entry(key))[0] if label_len is None: label_len = len(label) attributes = { 'n_attributes': label_len, 'negative_count': np.zeros(label_len), 'positive_count': np.zeros(label_len), 'negative_attribute_z': np.zeros((label_len, z_dim)), 'positive_attribute_z': np.zeros((label_len, z_dim)), } elif label_len != len(label): raise ValueError("label len differs: %d vs %d" % (label_len, len(label))) input_data.append(img) input_labels.append(label) n_input_samples = n_input_samples + 1 if n_input_samples % batch_size == 0: aggregate(input_data, input_labels, attributes, embeddings) print("######## %d processed ########" % n_input_samples) input_data = [] # sample data input_labels = [] # sample labels if n_input_samples % batch_size != 0: aggregate(input_data, input_labels, attributes, embeddings) print("######## %d processed ########" % n_input_samples)
def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu): # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load model job model_dir = os.path.join(jobs_dir, model_id) assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir model = Job.load(model_dir) # load dataset job dataset_dir = os.path.join(jobs_dir, model.dataset_id) assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir dataset = Job.load(dataset_dir) for task in model.tasks: task.dataset = dataset # retrieve snapshot file task = model.train_task() snapshot_filename = None epoch = float(epoch) if epoch == -1 and len(task.snapshots): # use last epoch epoch = task.snapshots[-1][1] snapshot_filename = task.snapshots[-1][0] else: for f, e in task.snapshots: if e == epoch: snapshot_filename = f break if not snapshot_filename: raise InferenceError("Unable to find snapshot for epoch=%s" % repr(self.epoch)) # retrieve image dimensions and resize mode if isinstance(dataset, ImageClassificationDatasetJob): height = dataset.image_dims[0] width = dataset.image_dims[1] channels = dataset.image_dims[2] resize_mode = dataset.resize_mode elif isinstance(dataset, GenericImageDatasetJob): db_task = dataset.analyze_db_tasks()[0] height = db_task.image_height width = db_task.image_width channels = db_task.image_channels resize_mode = 'squash' else: raise InferenceError("Unknown dataset type") n_input_samples = 0 # number of samples we were able to load input_ids = [] # indices of samples within file list input_data = [] # sample data # load paths from file paths = None with open(input_list) as infile: paths = infile.readlines() # load and resize images for idx, path in enumerate(paths): path = path.strip() try: image = utils.image.load_image(path.strip()) image = utils.image.resize_image(image, height, width, channels = channels, resize_mode = resize_mode, ) input_ids.append(idx) input_data.append(image) n_input_samples = n_input_samples + 1 except utils.errors.LoadImageError as e: print e # perform inference visualizations = None predictions = [] if n_input_samples == 0: raise InferenceError("Unable to load any image from file '%s'" % repr(input_list)) elif n_input_samples == 1: # single image inference outputs, visualizations = model.train_task().infer_one(input_data[0], snapshot_epoch=epoch, layers=layers, gpu=gpu) else: assert layers == 'none' outputs = model.train_task().infer_many(input_data, snapshot_epoch=epoch, gpu=gpu) # write to hdf5 file db_path = os.path.join(output_dir, 'inference.hdf5') db = h5py.File(db_path, 'w') # write input paths and images to database db.create_dataset("input_ids", data = input_ids) db.create_dataset("input_data", data = input_data) # write outputs to database db_outputs = db.create_group("outputs") for output_id, output_name in enumerate(outputs.keys()): output_data = outputs[output_name] output_key = base64.urlsafe_b64encode(str(output_name)) dset = db_outputs.create_dataset(output_key, data=output_data) # add ID attribute so outputs can be sorted in # the order they appear in here dset.attrs['id'] = output_id # write visualization data if visualizations is not None and len(visualizations)>0: db_layers = db.create_group("layers") for idx, layer in enumerate(visualizations): vis = layer['vis'] if layer['vis'] is not None else np.empty(0) dset = db_layers.create_dataset(str(idx), data=vis) dset.attrs['name'] = layer['name'] dset.attrs['vis_type'] = layer['vis_type'] if 'param_count' in layer: dset.attrs['param_count'] = layer['param_count'] if 'layer_type' in layer: dset.attrs['layer_type'] = layer['layer_type'] dset.attrs['shape'] = layer['data_stats']['shape'] dset.attrs['mean'] = layer['data_stats']['mean'] dset.attrs['stddev'] = layer['data_stats']['stddev'] dset.attrs['histogram_y'] = layer['data_stats']['histogram'][0] dset.attrs['histogram_x'] = layer['data_stats']['histogram'][1] dset.attrs['histogram_ticks'] = layer['data_stats']['histogram'][2] db.close() logger.info('Saved data to %s', db_path)
def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu, input_is_db, label_file, resize): """ Perform inference on a list of images using the specified model """ # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load model job model_dir = os.path.join(jobs_dir, model_id) assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir model = Job.load(model_dir) # load dataset job dataset_dir = os.path.join(jobs_dir, model.dataset_id) assert os.path.isdir( dataset_dir), "Dataset dir %s does not exist" % dataset_dir dataset = Job.load(dataset_dir) for task in model.tasks: task.dataset = dataset # retrieve snapshot file task = model.train_task() snapshot_filename = None epoch = float(epoch) if epoch == -1 and len(task.snapshots): # use last epoch epoch = task.snapshots[-1][1] snapshot_filename = task.snapshots[-1][0] else: for f, e in task.snapshots: if e == epoch: snapshot_filename = f break if not snapshot_filename: raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch)) # retrieve image dimensions and resize mode image_dims = dataset.get_feature_dims() height = image_dims[0] width = image_dims[1] channels = image_dims[2] resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash' n_input_samples = 0 # number of samples we were able to load input_ids = [] # indices of samples within file list input_data = [] # sample data if input_is_db: # load images from database reader = DbReader(input_list) for key, value in reader.entries(): datum = caffe_pb2.Datum() datum.ParseFromString(value) if datum.encoded: s = StringIO() s.write(datum.data) s.seek(0) img = PIL.Image.open(s) img = np.array(img) else: import caffe.io arr = caffe.io.datum_to_array(datum) # CHW -> HWC arr = arr.transpose((1, 2, 0)) if arr.shape[2] == 1: # HWC -> HW arr = arr[:, :, 0] elif arr.shape[2] == 3: # BGR -> RGB # XXX see issue #59 arr = arr[:, :, [2, 1, 0]] img = arr input_ids.append(key) input_data.append(img) n_input_samples = n_input_samples + 1 else: # load paths from file paths = None with open(input_list) as infile: paths = infile.readlines() # load and resize images for idx, path in enumerate(paths): path = path.strip() try: image = utils.image.load_image(path.strip()) if resize: image = utils.image.resize_image(image, height, width, channels=channels, resize_mode=resize_mode) else: image = utils.image.image_to_array(image, channels=channels) input_ids.append(idx) input_data.append(image) n_input_samples = n_input_samples + 1 except utils.errors.LoadImageError as e: print e labels = np.loadtxt(label_file, dtype='object') if fig is not None: # Plot original images to grid for row in range(NUM_ROWS): for col in range(NUM_COLS): idx = row * NUM_COLS + col pl.subplot(NUM_ROWS * 2, NUM_COLS, row * 2 * NUM_COLS + col + 1) pl.xticks([]) pl.yticks([]) pl.imshow(input_data[idx], interpolation='nearest') # perform inference visualizations = None logger.info('Inference') if n_input_samples == 0: raise InferenceError("Unable to load any image from file '%s'" % repr(input_list)) elif n_input_samples == 1: # single image inference logger.info('Start') outputs, visualizations = model.train_task().infer_one( input_data[0], snapshot_epoch=epoch, layers=layers, gpu=gpu, resize=resize) logger.info('Done!') else: if layers != 'none': raise InferenceError( "Layer visualization is not supported for multiple inference") outputs = model.train_task().infer_many(input_data, snapshot_epoch=epoch, gpu=gpu, resize=resize) logger.info('Now it\'s time to pass results to write') # write to hdf5 file db_path = os.path.join(output_dir, 'inference.hdf5') db = h5py.File(db_path, 'w') # write input paths and images to database db.create_dataset("input_ids", data=input_ids) db.create_dataset("input_data", data=input_data) # write outputs to database db_outputs = db.create_group("outputs") for output_id, output_name in enumerate(outputs.keys()): output_data = outputs[output_name] if fig is not None: # Plot top-K inferences on grids for elem_id, elem_data in enumerate(output_data): row = elem_id // NUM_COLS col = elem_id % NUM_COLS img_labels = sorted(zip(elem_data, labels), key=lambda x: x[0])[-NUM_TOPK_CLASSES:] ax = pl.subplot(NUM_ROWS * 2, NUM_COLS, (row * 2 + 1) * NUM_COLS + col + 1, aspect='equal') ax.yaxis.set_label_position("right") ax.yaxis.set_label_coords(1.25, 0.5) pl.ylabel('Confidence score', rotation=-90, fontsize=16) height = 0.5 ylocs = np.array(range(NUM_TOPK_CLASSES)) * height + 0.1 width = max(ylocs) top_class = img_labels[-1][1] pl.barh(ylocs, [l[0]*width for l in img_labels], height=height, \ color=['r' if l[1] == top_class else 'b' for l in img_labels]) #color=['r' if l[1] == labels[true_label] else 'b' for l in img_labels]) pl.yticks(ylocs + height / 2, [l[1] for l in img_labels], fontsize=14) pl.xticks([0, width / 2.0, width], ['0%', '50%', '100%']) pl.ylim(0, ylocs[-1] + height + 0.1) pl.tight_layout() pl.show() fig.savefig('./test.pdf', dpi=300) output_key = base64.urlsafe_b64encode(str(output_name)) dset = db_outputs.create_dataset(output_key, data=output_data) # add ID attribute so outputs can be sorted in # the order they appear in here dset.attrs['id'] = output_id # write visualization data if visualizations is not None and len(visualizations) > 0: db_layers = db.create_group("layers") for idx, layer in enumerate(visualizations): vis = layer['vis'] if layer['vis'] is not None else np.empty(0) dset = db_layers.create_dataset(str(idx), data=vis) dset.attrs['name'] = layer['name'] dset.attrs['vis_type'] = layer['vis_type'] if 'param_count' in layer: dset.attrs['param_count'] = layer['param_count'] if 'layer_type' in layer: dset.attrs['layer_type'] = layer['layer_type'] dset.attrs['shape'] = layer['data_stats']['shape'] dset.attrs['mean'] = layer['data_stats']['mean'] dset.attrs['stddev'] = layer['data_stats']['stddev'] dset.attrs['histogram_y'] = layer['data_stats']['histogram'][0] dset.attrs['histogram_x'] = layer['data_stats']['histogram'][1] dset.attrs['histogram_ticks'] = layer['data_stats']['histogram'][2] db.close() logger.info('Saved data to %s', db_path)
def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu, input_is_db): # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load model job model_dir = os.path.join(jobs_dir, model_id) assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir model = Job.load(model_dir) # load dataset job dataset_dir = os.path.join(jobs_dir, model.dataset_id) assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir dataset = Job.load(dataset_dir) for task in model.tasks: task.dataset = dataset # retrieve snapshot file task = model.train_task() snapshot_filename = None epoch = float(epoch) if epoch == -1 and len(task.snapshots): # use last epoch epoch = task.snapshots[-1][1] snapshot_filename = task.snapshots[-1][0] else: for f, e in task.snapshots: if e == epoch: snapshot_filename = f break if not snapshot_filename: raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch)) # retrieve image dimensions and resize mode image_dims = dataset.get_feature_dims() height = image_dims[0] width = image_dims[1] channels = image_dims[2] resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash' n_input_samples = 0 # number of samples we were able to load input_ids = [] # indices of samples within file list input_data = [] # sample data if input_is_db: # load images from database reader = DbReader(input_list) for key, value in reader.entries(): datum = caffe_pb2.Datum() datum.ParseFromString(value) if datum.encoded: s = StringIO() s.write(datum.data) s.seek(0) img = PIL.Image.open(s) img = np.array(img) else: import caffe.io arr = caffe.io.datum_to_array(datum) # CHW -> HWC arr = arr.transpose((1,2,0)) if arr.shape[2] == 1: # HWC -> HW arr = arr[:,:,0] elif arr.shape[2] == 3: # BGR -> RGB # XXX see issue #59 arr = arr[:,:,[2,1,0]] img = arr input_ids.append(key) input_data.append(img) n_input_samples = n_input_samples + 1 else: # load paths from file paths = None with open(input_list) as infile: paths = infile.readlines() # load and resize images for idx, path in enumerate(paths): path = path.strip() try: image = utils.image.load_image(path.strip()) image = utils.image.resize_image(image, height, width, channels = channels, resize_mode = resize_mode, ) input_ids.append(idx) input_data.append(image) n_input_samples = n_input_samples + 1 except utils.errors.LoadImageError as e: print e # perform inference visualizations = None predictions = [] if n_input_samples == 0: raise InferenceError("Unable to load any image from file '%s'" % repr(input_list)) elif n_input_samples == 1: # single image inference outputs, visualizations = model.train_task().infer_one(input_data[0], snapshot_epoch=epoch, layers=layers, gpu=gpu) else: assert layers == 'none' outputs = model.train_task().infer_many(input_data, snapshot_epoch=epoch, gpu=gpu) # write to hdf5 file db_path = os.path.join(output_dir, 'inference.hdf5') db = h5py.File(db_path, 'w') # write input paths and images to database db.create_dataset("input_ids", data = input_ids) db.create_dataset("input_data", data = input_data) # write outputs to database db_outputs = db.create_group("outputs") for output_id, output_name in enumerate(outputs.keys()): output_data = outputs[output_name] output_key = base64.urlsafe_b64encode(str(output_name)) dset = db_outputs.create_dataset(output_key, data=output_data) # add ID attribute so outputs can be sorted in # the order they appear in here dset.attrs['id'] = output_id # write visualization data if visualizations is not None and len(visualizations)>0: db_layers = db.create_group("layers") for idx, layer in enumerate(visualizations): vis = layer['vis'] if layer['vis'] is not None else np.empty(0) dset = db_layers.create_dataset(str(idx), data=vis) dset.attrs['name'] = layer['name'] dset.attrs['vis_type'] = layer['vis_type'] if 'param_count' in layer: dset.attrs['param_count'] = layer['param_count'] if 'layer_type' in layer: dset.attrs['layer_type'] = layer['layer_type'] dset.attrs['shape'] = layer['data_stats']['shape'] dset.attrs['mean'] = layer['data_stats']['mean'] dset.attrs['stddev'] = layer['data_stats']['stddev'] dset.attrs['histogram_y'] = layer['data_stats']['histogram'][0] dset.attrs['histogram_x'] = layer['data_stats']['histogram'][1] dset.attrs['histogram_ticks'] = layer['data_stats']['histogram'][2] db.close() logger.info('Saved data to %s', db_path)
def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu): # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load model job model_dir = os.path.join(jobs_dir, model_id) assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir model = Job.load(model_dir) # load dataset job dataset_dir = os.path.join(jobs_dir, model.dataset_id) assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir dataset = Job.load(dataset_dir) for task in model.tasks: task.dataset = dataset # retrieve snapshot file task = model.train_task() snapshot_filename = None epoch = float(epoch) if epoch == -1 and len(task.snapshots): # use last epoch epoch = task.snapshots[-1][1] snapshot_filename = task.snapshots[-1][0] else: for f, e in task.snapshots: if e == epoch: snapshot_filename = f break if not snapshot_filename: raise InferenceError("Unable to find snapshot for epoch=%s" % repr(self.epoch)) # retrieve image dimensions and resize mode if isinstance(dataset, ImageClassificationDatasetJob): height = dataset.image_dims[0] width = dataset.image_dims[1] channels = dataset.image_dims[2] resize_mode = dataset.resize_mode elif isinstance(dataset, GenericImageDatasetJob): db_task = dataset.analyze_db_tasks()[0] height = db_task.image_height width = db_task.image_width channels = db_task.image_channels resize_mode = 'squash' else: raise InferenceError("Unknown dataset type") # retrieve batch size (unless specified on command line) if batch_size is None: batch_size = task.get_test_batch_size() n_loaded_samples = 0 # number of samples we were able to load input_ids = [] # indices of samples within file list input_data = [] # sample data # create hdf5 file db_path = os.path.join(output_dir, 'inference.hdf5') db = h5py.File(db_path, 'w') db.create_group("outputs") # load paths from file paths = None with open(input_list) as infile: paths = infile.readlines() n_input_paths = len(paths) # load and resize images for idx, path in enumerate(paths): path = path.strip() try: image = utils.image.load_image(path.strip()) image = utils.image.resize_image(image, height, width, channels = channels, resize_mode = resize_mode, ) input_ids.append(idx) input_data.append(image) n_loaded_samples = n_loaded_samples + 1 except utils.errors.LoadImageError as e: print e # do we have a full batch, or have we reached the last item? if (not n_loaded_samples % batch_size) or (idx == n_input_paths - 1): # any item(s) left to save? if len(input_ids) > 0: # perform inference outputs, visualizations = infer_batch(model, input_data, epoch, layers, gpu) # save visualizations if visualizations is not None and len(visualizations)>0: save_visualizations(db, visualizations) # save other data save_data(db, n_input_paths, n_loaded_samples, input_ids, input_data, outputs) # empty input lists input_ids = [] input_data = [] logger.info('Processed %d/%d images', idx+1, n_input_paths) if n_loaded_samples == 0: raise InferenceError("Unable to load any image from file '%s'" % repr(input_list)) db.close() logger.info('Saved data to %s', db_path)
def infer(input_list, output_dir, jobs_dir, model_id, epoch, batch_size, layers, gpu, input_is_db, resize): """ Perform inference on a list of images using the specified model """ # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load model job model_dir = os.path.join(jobs_dir, model_id) assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir model = Job.load(model_dir) # load dataset job dataset_dir = os.path.join(jobs_dir, model.dataset_id) assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir dataset = Job.load(dataset_dir) for task in model.tasks: task.dataset = dataset # retrieve snapshot file task = model.train_task() snapshot_filename = None epoch = float(epoch) if epoch == -1 and len(task.snapshots): # use last epoch epoch = task.snapshots[-1][1] snapshot_filename = task.snapshots[-1][0] else: for f, e in task.snapshots: if e == epoch: snapshot_filename = f break if not snapshot_filename: raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch)) # Set color dataset kwargs = {'colormap': 'dataset'} vis = Visualization(dataset, **kwargs) # Delete existing png segmented images for filename in glob.glob("/home/scania/Scania/Agneev/Tmp/*"): os.remove(filename) # retrieve image dimensions and resize mode image_dims = dataset.get_feature_dims() height = image_dims[0] width = image_dims[1] channels = image_dims[2] resize_mode = dataset.resize_mode if hasattr(dataset, 'resize_mode') else 'squash' n_input_samples = 0 # number of samples we were able to load input_ids = [] # indices of samples within file list input_data = [] # sample data input_filename = [] if input_is_db: # load images from database reader = DbReader(input_list) for key, value in reader.entries(): datum = caffe_pb2.Datum() datum.ParseFromString(value) if datum.encoded: s = StringIO() s.write(datum.data) s.seek(0) img = PIL.Image.open(s) img = np.array(img) else: import caffe.io arr = caffe.io.datum_to_array(datum) # CHW -> HWC arr = arr.transpose((1, 2, 0)) if arr.shape[2] == 1: # HWC -> HW arr = arr[:, :, 0] elif arr.shape[2] == 3: # BGR -> RGB # XXX see issue #59 arr = arr[:, :, [2, 1, 0]] img = arr input_ids.append(key) input_data.append(img) n_input_samples = n_input_samples + 1 else: # load paths from file paths = None try: if input_list.endswith('.h264') or input_list.endswith('.raw'): logging.info('Reading video...') ## http://stackoverflow.com/questions/33650974/opencv-python-read-specific-frame-using-videocapture cap = cv2.VideoCapture(input_list) #'/home/scania/Scania/Glantan_Recordings/2017-03-24_DrivePX2/dw_20170324_115921_0.000000_0.000000/video_front.h264') print cap frame_no = 0 while frame_no < sys.maxint: cap.set(1,frame_no); ret, cv2_im = cap.read() #if not ret: # break cv2_im = cv2.cvtColor(cv2_im,cv2.COLOR_BGR2RGB) image = PIL.Image.fromarray(cv2_im) # print image if resize: image = utils.image.resize_image( image, height, width, channels=channels, resize_mode=resize_mode) else: image = utils.image.image_to_array( image, channels=channels) # single image inference outputs, visualizations = model.train_task().infer_one( image, snapshot_epoch=epoch, layers=layers, gpu=gpu, resize=resize) out = dict([outputs.items()][0]) out['score'] = out.items()[0][1][0] vis.process_data(n_input_samples, image, out, 'Video_file') n_input_samples = n_input_samples + 1 frame_no = frame_no + 30 elif input_list.endswith('.txt'): logging.info('Reading images...') with open(input_list) as infile: paths = infile.readlines() # load and resize images for idx, path in enumerate(paths): path = path.strip() try: image = utils.image.load_image(path.strip()) if resize: image = utils.image.resize_image( image, height, width, channels=channels, resize_mode=resize_mode) else: image = utils.image.image_to_array( image, channels=channels) # single image inference outputs, visualizations = model.train_task().infer_one( image, snapshot_epoch=epoch, layers=layers, gpu=gpu, resize=resize) # Find filename head, tail = os.path.split(path) filename = tail.split('.')[0] out = dict([outputs.items()][0]) out['score'] = out.items()[0][1][0] vis.process_data(n_input_samples, image, out, filename) n_input_samples = n_input_samples + 1 except utils.errors.LoadImageError as e: print e else: print 'Cannot read image or video file. \nPlease provide .h264, .raw or .txt file only.' except cv2.error as e: print e
def infer(jobs_dir, model_id, epoch, batch_size, gpu): """ Perform inference on a list of images using the specified model """ # job directory defaults to that defined in DIGITS config if jobs_dir == 'none': jobs_dir = digits.config.config_value('jobs_dir') # load model job model_dir = os.path.join(jobs_dir, model_id) assert os.path.isdir(model_dir), "Model dir %s does not exist" % model_dir model = Job.load(model_dir) # load dataset job dataset_dir = os.path.join(jobs_dir, model.dataset_id) assert os.path.isdir(dataset_dir), "Dataset dir %s does not exist" % dataset_dir dataset = Job.load(dataset_dir) for task in model.tasks: task.dataset = dataset # retrieve snapshot file task = model.train_task() snapshot_filename = None epoch = float(epoch) if epoch == -1 and len(task.snapshots): # use last epoch epoch = task.snapshots[-1][1] snapshot_filename = task.snapshots[-1][0] else: for f, e in task.snapshots: if e == epoch: snapshot_filename = f break if not snapshot_filename: raise InferenceError("Unable to find snapshot for epoch=%s" % repr(epoch)) input_data = [] # sample data input_labels = [] # sample labels # load images from database feature_db_path = dataset.get_feature_db_path(utils.constants.TRAIN_DB) feature_reader = DbReader(feature_db_path) label_db_path = dataset.get_label_db_path(utils.constants.TRAIN_DB) label_reader = DbReader(label_db_path) embeddings = {'count': 0, 'images': None, 'zs': None} def aggregate(images, labels, attributes, embeddings): # perform inference outputs = model.train_task().infer_many( images, snapshot_epoch=epoch, gpu=gpu, resize=False) z_vectors = outputs['output'][:, :100] for image, label, z in zip(images, labels, z_vectors): if embeddings['images'] is None: embeddings['images'] = np.empty((N_EMBEDDINGS,) + image.shape) if embeddings['zs'] is None: embeddings['zs'] = np.empty((N_EMBEDDINGS,) + z.shape) if embeddings['count'] < N_EMBEDDINGS: embeddings['images'][embeddings['count']] = image embeddings['zs'][embeddings['count']] = z embeddings['count'] += 1 if embeddings['count'] == N_EMBEDDINGS: save_embeddings(embeddings) for attribute in range(attributes['n_attributes']): if label[attribute] > 0: attributes['positive_attribute_z'][attribute] += z attributes['positive_count'][attribute] += 1 else: attributes['negative_attribute_z'][attribute] += z attributes['negative_count'][attribute] += 1 # save save_attributes(attributes) n_input_samples = 0 label_len = None z_dim = 100 for key, value in feature_reader.entries(): img = parse_datum(value) label = parse_datum(label_reader.entry(key))[0] if label_len is None: label_len = len(label) attributes = { 'n_attributes': label_len, 'negative_count': np.zeros(label_len), 'positive_count': np.zeros(label_len), 'negative_attribute_z': np.zeros((label_len, z_dim)), 'positive_attribute_z': np.zeros((label_len, z_dim)), } elif label_len != len(label): raise ValueError("label len differs: %d vs %d" % (label_len, len(label))) input_data.append(img) input_labels.append(label) n_input_samples = n_input_samples + 1 if n_input_samples % batch_size == 0: aggregate(input_data, input_labels, attributes, embeddings) print("######## %d processed ########" % n_input_samples) input_data = [] # sample data input_labels = [] # sample labels if n_input_samples % batch_size != 0: aggregate(input_data, input_labels, attributes, embeddings) print("######## %d processed ########" % n_input_samples)