def path_to_datum(self, path, label, image_sum = None): """ Creates a Datum from a path and a label May also update image_sum, if computing mean Arguments: path -- path to the image (filesystem path or URL) label -- numeric label for this image's category Keyword arguments: image_sum -- numpy array that stores a running sum of added images """ # prepend path with image_folder, if appropriate if not utils.is_url(path) and self.image_folder and not os.path.isabs(path): path = os.path.join(self.image_folder, path) image = utils.image.load_image(path) image = utils.image.resize_image(image, self.height, self.width, channels = self.channels, resize_mode = self.resize_mode, ) if self.compute_mean and image_sum is not None: image_sum += image if not self.encoding or self.encoding == 'none': # Transform to caffe's format requirements if image.ndim == 3: # Transpose to (channels, height, width) image = image.transpose((2,0,1)) if image.shape[0] == 3: # channel swap # XXX see issue #59 image = image[[2,1,0],...] elif image.ndim == 2: # Add a channels axis image = image[np.newaxis,:,:] else: raise Exception('Image has unrecognized shape: "%s"' % image.shape) datum = caffe.io.array_to_datum(image, label) else: datum = caffe_pb2.Datum() if image.ndim == 3: datum.channels = image.shape[2] else: datum.channels = 1 datum.height = image.shape[0] datum.width = image.shape[1] datum.label = label s = StringIO() if self.encoding == 'png': PIL.Image.fromarray(image).save(s, format='PNG') elif self.encoding == 'jpg': PIL.Image.fromarray(image).save(s, format='JPEG', quality=90) datum.data = s.getvalue() datum.encoded = True return datum
def validate_folder_path(form, field): if not field.data: pass elif utils.is_url(field.data): # make sure the URL exists try: r = requests.get(field.data, allow_redirects=False, timeout=utils.HTTP_TIMEOUT) if r.status_code not in [ requests.codes.ok, requests.codes.moved, requests.codes.found ]: raise validators.ValidationError('URL not found') except Exception as e: raise validators.ValidationError( 'Caught %s while checking URL: %s' % (type(e).__name__, e)) else: return True else: # make sure the filesystem path exists if not os.path.exists(field.data) or not os.path.isdir(field.data): raise validators.ValidationError('Folder does not exist') else: return True
def _load_thread(load_queue, write_queue, summary_queue, image_width, image_height, image_channels, resize_mode, image_folder, compute_mean, backend=None, encoding=None): """ Consumes items in load_queue Produces items to write_queue Stores cumulative results in summary_queue """ images_added = 0 if compute_mean: image_sum = _initial_image_sum(image_width, image_height, image_channels) else: image_sum = None while not load_queue.empty(): try: path, label = load_queue.get(True, 0.05) except Queue.Empty: continue # prepend path with image_folder, if appropriate if not utils.is_url(path) and image_folder and not os.path.isabs(path): path = os.path.join(image_folder, path) try: image = utils.image.load_image(path) except utils.errors.LoadImageError as e: logger.warning('[%s %s] %s: %s' % (path, label, type(e).__name__, e)) continue image = utils.image.resize_image( image, image_height, image_width, channels=image_channels, resize_mode=resize_mode, ) if compute_mean: image_sum += image if backend == 'lmdb': datum = _array_to_datum(image, label, encoding) write_queue.put(datum) else: write_queue.put((image, label)) images_added += 1 summary_queue.put((images_added, image_sum))
def path_to_datum(self, path, label, image_sum=None): """ Creates a Datum from a path and a label May also update image_sum, if computing mean Arguments: path -- path to the image (filesystem path or URL) label -- numeric label for this image's category Keyword arguments: image_sum -- numpy array that stores a running sum of added images """ # prepend path with image_folder, if appropriate if not utils.is_url(path) and self.image_folder and not os.path.isabs( path): path = os.path.join(self.image_folder, path) image = utils.image.load_image(path) if image is None: return None # Resize image = utils.image.resize_image( image, self.height, self.width, channels=self.channels, resize_mode=self.resize_mode, ) if self.compute_mean and image_sum is not None: image_sum += image if self.encode: datum = caffe_pb2.Datum() if image.ndim == 3: datum.channels = image.shape[2] else: datum.channels = 1 datum.height = image.shape[0] datum.width = image.shape[1] datum.label = label datum.encoded = True s = StringIO() PIL.Image.fromarray(image).save(s, format='JPEG', quality=90) datum.data = s.getvalue() else: # Transform to caffe's format requirements if image.ndim == 3: # Transpose to (channels, height, width) image = image.transpose((2, 0, 1)) elif image.ndim == 2: # Add a channels axis image = image[np.newaxis, :, :] else: raise Exception('Image has unrecognized shape: "%s"' % image.shape) datum = caffe.io.array_to_datum(image, label) return datum
def validate_folder(folder): if utils.is_url(folder): try: r = requests.head(folder, timeout=utils.HTTP_TIMEOUT) if r.status_code not in [ requests.codes.ok, requests.codes.moved, requests.codes.found ]: logger.error('"%s" returned status_code %s' % (folder, r.status_code)) return False except Exception as e: logger.error('%s: %s' % (type(e).__name__, e)) return False return True if not os.path.exists(folder): logger.error('folder "%s" does not exist' % folder) return False if not os.path.isdir(folder): logger.error('"%s" is not a directory' % folder) return False if not os.access(folder, os.R_OK): logger.error('you do not have read access to folder "%s"' % folder) return False return True
def read_image_list(image_list, image_folder, num_test_images): paths = [] ground_truths = [] for line in image_list.readlines(): line = line.strip() if not line: continue # might contain a numerical label at the end match = re.match(r'(.*\S)\s+(\d+)$', line) if match: path = match.group(1) ground_truth = int(match.group(2)) else: path = line ground_truth = None if not utils.is_url(path) and image_folder and not os.path.isabs(path): path = os.path.join(image_folder, path) paths.append(path) ground_truths.append(ground_truth) if num_test_images is not None and len(paths) >= num_test_images: break return paths, ground_truths
def _load_thread(load_queue, write_queue, summary_queue, image_width, image_height, image_channels, resize_mode, image_folder, compute_mean, get_bboxes, scale_factor): """ Consumes items in load_queue Produces items to write_queue Stores cumulative results in summary_queue """ if get_bboxes: extract_bbox_patches = BoundingBoxExtractor(scale_factor=scale_factor) images_added = 0 if compute_mean: image_sum = _initial_image_sum(image_width, image_height, image_channels) else: image_sum = None while not load_queue.empty(): try: if get_bboxes: path, bbox, label = load_queue.get(True, 0.05) else: path, label = load_queue.get(True, 0.05) except Queue.Empty: continue # prepend path with image_folder, if appropriate if not utils.is_url(path) and image_folder and not os.path.isabs(path): path = os.path.join(image_folder, path) try: image = utils.image.load_image(path) if get_bboxes: ## TODO - Make more efficient - currently loads image for each bbox in that image. image = extract_bbox_patches.extract(image, bbox) except utils.errors.LoadImageError as e: logger.warning('[%s] %s: %s' % (path, type(e).__name__, e)) continue image = utils.image.resize_image( image, image_height, image_width, channels=image_channels, resize_mode=resize_mode, ) if compute_mean: image_sum += image write_queue.put((image, label)) images_added += 1 summary_queue.put((images_added, image_sum))
def _load_thread(load_queue, write_queue, summary_queue, image_width, image_height, image_channels, resize_mode, image_folder, compute_mean, backend=None, encoding=None): """ Consumes items in load_queue Produces items to write_queue Stores cumulative results in summary_queue """ images_added = 0 if compute_mean: image_sum = _initial_image_sum(image_width, image_height, image_channels) else: image_sum = None while not load_queue.empty(): try: path, label = load_queue.get(True, 0.05) except Queue.Empty: continue # prepend path with image_folder, if appropriate if not utils.is_url(path) and image_folder and not os.path.isabs(path): path = os.path.join(image_folder, path) for i in xrange(100): try: image = utils.image.load_image(path) break except utils.errors.LoadImageError as e: logger.warning('[%s (%d)] %s: %s' % (path, i, type(e).__name__, e)) if not image: logger.warning('[%s]: Failed to load even in %d retries' % (path, 100)) continue image = utils.image.resize_image(image, image_height, image_width, channels = image_channels, resize_mode = resize_mode, ) if compute_mean: image_sum += image if backend == 'lmdb': datum = _array_to_datum(image, label, encoding) write_queue.put(datum) else: write_queue.put((image, label)) images_added += 1 summary_queue.put((images_added, image_sum))
def _load_thread(load_queue, write_queue, summary_queue, image_width, image_height, image_channels, resize_mode, image_folder, compute_mean, get_bboxes, scale_factor): """ Consumes items in load_queue Produces items to write_queue Stores cumulative results in summary_queue """ if get_bboxes: extract_bbox_patches = BoundingBoxExtractor(scale_factor=scale_factor) images_added = 0 if compute_mean: image_sum = _initial_image_sum(image_width, image_height, image_channels) else: image_sum = None while not load_queue.empty(): try: if get_bboxes: path, bbox, label = load_queue.get(True, 0.05) else: path, label = load_queue.get(True, 0.05) except Queue.Empty: continue # prepend path with image_folder, if appropriate if not utils.is_url(path) and image_folder and not os.path.isabs(path): path = os.path.join(image_folder, path) try: image = utils.image.load_image(path) if get_bboxes: ## TODO - Make more efficient - currently loads image for each bbox in that image. image = extract_bbox_patches.extract(image, bbox) except utils.errors.LoadImageError as e: logger.warning('[%s] %s: %s' % (path, type(e).__name__, e) ) continue image = utils.image.resize_image(image, image_height, image_width, channels = image_channels, resize_mode = resize_mode, ) if compute_mean: image_sum += image write_queue.put((image, label)) images_added += 1 summary_queue.put((images_added, image_sum))
def validate_folder_path(form, field): if not field.data: pass elif utils.is_url(field.data): # make sure the URL exists try: r = requests.get(field.data, allow_redirects=False, timeout=utils.HTTP_TIMEOUT) if r.status_code not in [requests.codes.ok, requests.codes.moved, requests.codes.found]: raise validators.ValidationError("URL not found") except Exception as e: raise validators.ValidationError("Caught %s while checking URL: %s" % (type(e).__name__, e)) else: return True else: # make sure the filesystem path exists if not os.path.exists(field.data) or not os.path.isdir(field.data): raise validators.ValidationError("Folder does not exist") else: return True
def _load_thread(load_queue, write_queue, summary_queue, image_width, image_height, image_channels, resize_mode, image_folder, compute_mean): """ Consumes items in load_queue Produces items to write_queue Stores cumulative results in summary_queue """ images_added = 0 if compute_mean: image_sum = _initial_image_sum(image_width, image_height, image_channels) else: image_sum = None while not load_queue.empty(): try: path, label = load_queue.get(True, 0.05) except Queue.Empty: continue # prepend path with image_folder, if appropriate if not utils.is_url(path) and image_folder and not os.path.isabs(path): path = os.path.join(image_folder, path) try: image = utils.image.load_image(path) except utils.errors.LoadImageError as e: logger.warning('[%s] %s: %s' % (path, type(e).__name__, e) ) continue image = utils.image.resize_image(image, image_height, image_width, channels = image_channels, resize_mode = resize_mode, ) if compute_mean: image_sum += image write_queue.put((image, label)) images_added += 1 summary_queue.put((images_added, image_sum))
def validate_folder(folder): if utils.is_url(folder): try: r = requests.head(folder, timeout=utils.HTTP_TIMEOUT) if r.status_code not in [requests.codes.ok, requests.codes.moved, requests.codes.found]: logger.error('"%s" returned status_code %s' % (folder, r.status_code)) return False except Exception as e: logger.error('%s: %s' % (type(e).__name__, e)) return False return True if not os.path.exists(folder): logger.error('folder "%s" does not exist' % folder) return False if not os.path.isdir(folder): logger.error('"%s" is not a directory' % folder) return False if not os.access(folder, os.R_OK): logger.error('you do not have read access to folder "%s"' % folder) return False return True
def classify_many(): """ Classify many images and return the top 5 classifications for each Returns JSON when requested: {classifications: {filename: [[category,confidence],...],...}} """ job = job_from_request() image_list = flask.request.files.get('image_list') if not image_list: raise werkzeug.exceptions.BadRequest('image_list is a required field') if 'image_folder' in flask.request.form and flask.request.form['image_folder'].strip(): image_folder = flask.request.form['image_folder'] if not os.path.exists(image_folder): raise werkzeug.exceptions.BadRequest('image_folder "%s" does not exit' % image_folder) else: image_folder = None epoch = None if 'snapshot_epoch' in flask.request.form: epoch = float(flask.request.form['snapshot_epoch']) paths = [] images = [] ground_truths = [] dataset = job.train_task().dataset for line in image_list.readlines(): line = line.strip() if not line: continue path = None # might contain a numerical label at the end match = re.match(r'(.*\S)\s+(\d+)$', line) if match: path = match.group(1) ground_truth = int(match.group(2)) else: path = line ground_truth = None try: if not utils.is_url(path) and image_folder and not os.path.isabs(path): path = os.path.join(image_folder, path) image = utils.image.load_image(path) image = utils.image.resize_image(image, dataset.image_dims[0], dataset.image_dims[1], channels = dataset.image_dims[2], resize_mode = dataset.resize_mode, ) paths.append(path) images.append(image) ground_truths.append(ground_truth) except utils.errors.LoadImageError as e: print e if not len(images): raise werkzeug.exceptions.BadRequest( 'Unable to load any images from the file') labels, scores = job.train_task().infer_many(images, snapshot_epoch=epoch) if scores is None: raise RuntimeError('An error occured while processing the images') # take top 5 indices = (-scores).argsort()[:, :5] classifications = [] for image_index, index_list in enumerate(indices): result = [] for i in index_list: # `i` is a category in labels and also an index into scores result.append((labels[i], round(100.0*scores[image_index, i],2))) classifications.append(result) # replace ground truth indices with labels ground_truths = [labels[x] if x is not None and (0 <= x < len(labels)) else None for x in ground_truths] if request_wants_json(): joined = dict(zip(paths, classifications)) return flask.jsonify({'classifications': joined}) else: return flask.render_template('models/images/classification/classify_many.html', job = job, paths = paths, classifications = classifications, show_ground_truth= not(ground_truths == [None]*len(ground_truths)), ground_truths = ground_truths )
def infer_many(): """ Infer many images """ model_job = job_from_request() image_list = flask.request.files.get('image_list') if not image_list: raise werkzeug.exceptions.BadRequest('image_list is a required field') if 'image_folder' in flask.request.form and flask.request.form[ 'image_folder'].strip(): image_folder = flask.request.form['image_folder'] if not os.path.exists(image_folder): raise werkzeug.exceptions.BadRequest( 'image_folder "%s" does not exit' % image_folder) else: image_folder = None if 'num_test_images' in flask.request.form and flask.request.form[ 'num_test_images'].strip(): num_test_images = int(flask.request.form['num_test_images']) else: num_test_images = None epoch = None if 'snapshot_epoch' in flask.request.form: epoch = float(flask.request.form['snapshot_epoch']) if 'dont_resize' in flask.request.form and flask.request.form[ 'dont_resize']: resize = False else: resize = True paths = [] for line in image_list.readlines(): line = line.strip() if not line: continue path = None # might contain a numerical label at the end match = re.match(r'(.*\S)\s+\d+$', line) if match: path = match.group(1) else: path = line if not utils.is_url(path) and image_folder and not os.path.isabs(path): path = os.path.join(image_folder, path) paths.append(path) if num_test_images is not None and len(paths) >= num_test_images: break # create inference job inference_job = ImageInferenceJob( username=utils.auth.get_username(), name="Infer Many Images", model=model_job, images=paths, epoch=epoch, layers='none', resize=resize, ) # schedule tasks scheduler.add_job(inference_job) # wait for job to complete inference_job.wait_completion() # retrieve inference data inputs, outputs, _ = inference_job.get_data() # set return status code status_code = 500 if inference_job.status == 'E' else 200 # delete job folder and remove from scheduler list scheduler.delete_job(inference_job) if outputs is not None and len(outputs) < 1: # an error occurred outputs = None if inputs is not None: paths = [paths[idx] for idx in inputs['ids']] inference_views_html, header_html, app_begin_html, app_end_html = get_inference_visualizations( model_job.dataset, inputs, outputs) else: inference_views_html = None header_html = None app_begin_html = None app_end_html = None if request_wants_json(): result = {} for i, path in enumerate(paths): result[path] = dict( (name, blob[i].tolist()) for name, blob in outputs.iteritems()) return flask.jsonify({'outputs': result}), status_code else: return flask.render_template( 'models/images/generic/infer_many.html', model_job=model_job, job=inference_job, paths=paths, inference_views_html=inference_views_html, header_html=header_html, app_begin_html=app_begin_html, app_end_html=app_end_html, ), status_code
def parse_folder( folder, labels_file, train_file=None, percent_train=None, val_file=None, percent_val=None, test_file=None, percent_test=None, min_per_category=2, max_per_category=None, ): """ Parses a folder of images into three textfiles Returns True on success Arguments: folder -- a folder containing folders of images (can be a filesystem path or a url) labels_file -- file for labels Keyword Arguments: train_file -- output file for training images percent_test -- percentage of images to use in the training set val_file -- output file for validation images percent_val -- percentage of images to use in the validation set test_file -- output file for test images percent_test -- percentage of images to use in the test set min_per_category -- minimum number of images per category max_per_category -- maximum number of images per category """ create_labels = (percent_train > 0) labels = [] # Read the labels from labels_file if not create_labels: with open(labels_file) as infile: for line in infile: line = line.strip() if line: labels.append(line) # Verify that at least two category folders exist folder_is_url = utils.is_url(folder) if folder_is_url: if not folder.endswith('/'): folder += '/' subdirs, _ = parse_web_listing(folder) else: if os.path.exists(folder) and os.path.isdir(folder): subdirs = [] for filename in os.listdir(folder): subdir = os.path.join(folder, filename) if os.path.isdir(subdir): subdirs.append(subdir) else: logger.error('folder does not exist') return False subdirs.sort() if len(subdirs) < 2: logger.error('folder must contain at least two subdirectories') return False # Parse the folder train_count = 0 val_count = 0 test_count = 0 if percent_train: train_outfile = open(train_file, 'w') if percent_val: val_outfile = open(val_file, 'w') if percent_test: test_outfile = open(test_file, 'w') subdir_index = 0 label_index = 0 for subdir in subdirs: # Use the directory name as the label label_name = subdir if folder_is_url: label_name = unescape(label_name) else: label_name = os.path.basename(label_name) label_name = label_name.replace('_', ' ') if label_name.endswith('/'): # Remove trailing slash label_name = label_name[0:-1] if create_labels: labels.append(label_name) label_index = len(labels) - 1 else: found = False for i, l in enumerate(labels): if label_name == l: found = True label_index = i break if not found: logger.warning( 'Category "%s" not found in labels_file. Skipping.' % label_name) continue logger.debug('Category - %s' % label_name) lines = [] # Read all images in the folder if folder_is_url: urls, _ = web_listing_all_files(folder + subdir, max_count=max_per_category) for url in urls: lines.append('%s %d' % (url, label_index)) else: for dirpath, dirnames, filenames in os.walk(os.path.join( folder, subdir), followlinks=True): for filename in filenames: if filename.lower().endswith( utils.image.SUPPORTED_EXTENSIONS): lines.append('%s %d' % (os.path.join( folder, subdir, dirpath, filename), label_index)) if max_per_category is not None and len( lines) >= max_per_category: break if max_per_category is not None and len( lines) >= max_per_category: logger.warning('Reached maximum limit for this category') break # Split up the lines train_lines = [] val_lines = [] test_lines = [] required_categories = 0 if percent_train > 0: required_categories += 1 if percent_val > 0: required_categories += 1 if percent_test > 0: required_categories += 1 if not lines or len(lines) < required_categories or len( lines) < min_per_category: logger.warning('Not enough images for this category') labels.pop() else: random.shuffle(lines) a, b = three_way_split_indices(len(lines), percent_val, percent_test) train_lines = lines[:a] val_lines = lines[a:b] test_lines = lines[b:] if train_lines: train_outfile.write('\n'.join(train_lines) + '\n') train_count += len(train_lines) if val_lines: val_outfile.write('\n'.join(val_lines) + '\n') val_count += len(val_lines) if test_lines: test_outfile.write('\n'.join(test_lines) + '\n') test_count += len(test_lines) subdir_index += 1 logger.debug('Progress: %0.2f' % (float(subdir_index) / len(subdirs))) if percent_train: train_outfile.close() if percent_val: val_outfile.close() if percent_test: test_outfile.close() if create_labels: if len(labels) < 2: logger.error('Did not find two valid categories') return False else: with open(labels_file, 'w') as labels_outfile: labels_outfile.write('\n'.join(labels) + '\n') logger.info('Found %d images in %d categories.' % (train_count + val_count + test_count, len(labels))) logger.info('Selected %d for training.' % train_count) logger.info('Selected %d for validation.' % val_count) logger.info('Selected %d for testing.' % test_count) return True
def infer_many(): """ Infer many images """ job = job_from_request() image_list = flask.request.files.get('image_list') if not image_list: raise werkzeug.exceptions.BadRequest('image_list is a required field') if 'image_folder' in flask.request.form and flask.request.form['image_folder'].strip(): image_folder = flask.request.form['image_folder'] if not os.path.exists(image_folder): raise werkzeug.exceptions.BadRequest('image_folder "%s" does not exit' % image_folder) else: image_folder = None epoch = None if 'snapshot_epoch' in flask.request.form: epoch = float(flask.request.form['snapshot_epoch']) paths = [] images = [] db_task = job.train_task().dataset.analyze_db_tasks()[0] height = db_task.image_height width = db_task.image_width channels = db_task.image_channels for line in image_list.readlines(): line = line.strip() if not line: continue path = None # might contain a numerical label at the end match = re.match(r'(.*\S)\s+\d+$', line) if match: path = match.group(1) else: path = line try: if not utils.is_url(path) and image_folder and not os.path.isabs(path): path = os.path.join(image_folder, path) print path image = utils.image.load_image(path) image = utils.image.resize_image(image, height, width, channels = channels, resize_mode = 'squash', ) paths.append(path) images.append(image) except utils.errors.LoadImageError as e: print e if not len(images): raise werkzeug.exceptions.BadRequest( 'Unable to load any images from the file') outputs = job.train_task().infer_many(images, snapshot_epoch=epoch) if outputs is None: raise RuntimeError('An error occured while processing the images') if request_wants_json(): result = {} for i, path in enumerate(paths): result[path] = dict((name, blob[i].tolist()) for name,blob in outputs.iteritems()) return flask.jsonify({'outputs': result}) else: return flask.render_template('models/images/generic/infer_many.html', job = job, paths = paths, network_outputs = outputs, )
def infer_many(): """ Infer many images """ model_job = job_from_request() image_list = flask.request.files.get('image_list') if not image_list: raise werkzeug.exceptions.BadRequest('image_list is a required field') if 'image_folder' in flask.request.form and flask.request.form['image_folder'].strip(): image_folder = flask.request.form['image_folder'] if not os.path.exists(image_folder): raise werkzeug.exceptions.BadRequest('image_folder "%s" does not exit' % image_folder) else: image_folder = None if 'num_test_images' in flask.request.form and flask.request.form['num_test_images'].strip(): num_test_images = int(flask.request.form['num_test_images']) else: num_test_images = None epoch = None if 'snapshot_epoch' in flask.request.form: epoch = float(flask.request.form['snapshot_epoch']) if 'dont_resize' in flask.request.form and flask.request.form['dont_resize']: resize = False else: resize = True paths = [] for line in image_list.readlines(): line = line.strip() if not line: continue path = None # might contain a numerical label at the end match = re.match(r'(.*\S)\s+\d+$', line) if match: path = match.group(1) else: path = line if not utils.is_url(path) and image_folder and not os.path.isabs(path): path = os.path.join(image_folder, path) paths.append(path) if num_test_images is not None and len(paths) >= num_test_images: break # create inference job inference_job = ImageInferenceJob( username=utils.auth.get_username(), name="Infer Many Images", model=model_job, images=paths, epoch=epoch, layers='none', resize=resize, ) # schedule tasks scheduler.add_job(inference_job) # wait for job to complete inference_job.wait_completion() # retrieve inference data inputs, outputs, _ = inference_job.get_data() # set return status code status_code = 500 if inference_job.status == 'E' else 200 # delete job folder and remove from scheduler list scheduler.delete_job(inference_job) if outputs is not None and len(outputs) < 1: # an error occurred outputs = None if inputs is not None: paths = [paths[idx] for idx in inputs['ids']] inference_views_html, header_html, app_begin_html, app_end_html = get_inference_visualizations( model_job.dataset, inputs, outputs) else: inference_views_html = None header_html = None app_begin_html = None app_end_html = None if request_wants_json(): result = {} for i, path in enumerate(paths): result[path] = dict((name, blob[i].tolist()) for name, blob in outputs.iteritems()) return flask.jsonify({'outputs': result}), status_code else: return flask.render_template( 'models/images/generic/infer_many.html', model_job=model_job, job=inference_job, paths=paths, inference_views_html=inference_views_html, header_html=header_html, app_begin_html=app_begin_html, app_end_html=app_end_html, ), status_code
def classify_many(): """ Classify many images and return the top 5 classifications for each Returns JSON when requested: {classifications: {filename: [[category,confidence],...],...}} """ job = job_from_request() image_list = flask.request.files.get('image_list') if not image_list: raise werkzeug.exceptions.BadRequest('image_list is a required field') if 'image_folder' in flask.request.form and flask.request.form[ 'image_folder'].strip(): image_folder = flask.request.form['image_folder'] if not os.path.exists(image_folder): raise werkzeug.exceptions.BadRequest( 'image_folder "%s" does not exit' % image_folder) else: image_folder = None epoch = None if 'snapshot_epoch' in flask.request.form: epoch = float(flask.request.form['snapshot_epoch']) paths = [] images = [] ground_truths = [] dataset = job.train_task().dataset for line in image_list.readlines(): line = line.strip() if not line: continue path = None # might contain a numerical label at the end match = re.match(r'(.*\S)\s+(\d+)$', line) if match: path = match.group(1) ground_truth = int(match.group(2)) else: path = line ground_truth = None try: if not utils.is_url(path) and image_folder and not os.path.isabs( path): path = os.path.join(image_folder, path) image = utils.image.load_image(path) image = utils.image.resize_image( image, dataset.image_dims[0], dataset.image_dims[1], channels=dataset.image_dims[2], resize_mode=dataset.resize_mode, ) paths.append(path) images.append(image) ground_truths.append(ground_truth) except utils.errors.LoadImageError as e: print e if not len(images): raise werkzeug.exceptions.BadRequest( 'Unable to load any images from the file') labels, scores = job.train_task().infer_many(images, snapshot_epoch=epoch) if scores is None: raise RuntimeError('An error occured while processing the images') # take top 5 indices = (-scores).argsort()[:, :5] classifications = [] for image_index, index_list in enumerate(indices): result = [] for i in index_list: # `i` is a category in labels and also an index into scores result.append((labels[i], round(100.0 * scores[image_index, i], 2))) classifications.append(result) # replace ground truth indices with labels ground_truths = [ labels[x] if x is not None and (0 <= x < len(labels)) else None for x in ground_truths ] if request_wants_json(): joined = dict(zip(paths, classifications)) return flask.jsonify({'classifications': joined}) else: return flask.render_template( 'models/images/classification/classify_many.html', job=job, paths=paths, classifications=classifications, show_ground_truth=not (ground_truths == [None] * len(ground_truths)), ground_truths=ground_truths)
def classify_many(): """ Classify many images and return the top 5 classifications for each Returns JSON when requested: {classifications: {filename: [[category,confidence],...],...}} """ model_job = job_from_request() image_list = flask.request.files.get('image_list') if not image_list: raise werkzeug.exceptions.BadRequest('image_list is a required field') if 'image_folder' in flask.request.form and flask.request.form['image_folder'].strip(): image_folder = flask.request.form['image_folder'] if not os.path.exists(image_folder): raise werkzeug.exceptions.BadRequest('image_folder "%s" does not exit' % image_folder) else: image_folder = None if 'num_test_images' in flask.request.form and flask.request.form['num_test_images'].strip(): num_test_images = int(flask.request.form['num_test_images']) else: num_test_images = None epoch = None if 'snapshot_epoch' in flask.request.form: epoch = float(flask.request.form['snapshot_epoch']) paths = [] ground_truths = [] for line in image_list.readlines(): line = line.strip() if not line: continue path = None # might contain a numerical label at the end match = re.match(r'(.*\S)\s+(\d+)$', line) if match: path = match.group(1) ground_truth = int(match.group(2)) else: path = line ground_truth = None if not utils.is_url(path) and image_folder and not os.path.isabs(path): path = os.path.join(image_folder, path) paths.append(path) ground_truths.append(ground_truth) if num_test_images is not None and len(paths) >= num_test_images: break # create inference job inference_job = ImageInferenceJob( username = utils.auth.get_username(), name = "Classify Many Images", model = model_job, images = paths, epoch = epoch, layers = 'none' ) # schedule tasks scheduler.add_job(inference_job) # wait for job to complete inference_job.wait_completion() # retrieve inference data inputs, outputs, _ = inference_job.get_data() # delete job scheduler.delete_job(inference_job) if outputs is not None and len(outputs) < 1: # an error occurred outputs = None if inputs is not None: # retrieve path and ground truth of images that were successfully processed paths = [paths[idx] for idx in inputs['ids']] ground_truths = [ground_truths[idx] for idx in inputs['ids']] classifications = None if outputs is not None: # convert to class probabilities for viewing last_output_name, last_output_data = outputs.items()[-1] if len(last_output_data) < 1: raise werkzeug.exceptions.BadRequest( 'Unable to classify any image from the file') scores = last_output_data # take top 5 indices = (-scores).argsort()[:, :5] labels = model_job.train_task().get_labels() classifications = [] for image_index, index_list in enumerate(indices): result = [] for i in index_list: # `i` is a category in labels and also an index into scores result.append((labels[i], round(100.0*scores[image_index, i],2))) classifications.append(result) # replace ground truth indices with labels ground_truths = [labels[x] if x is not None and (0 <= x < len(labels)) else None for x in ground_truths] if request_wants_json(): joined = dict(zip(paths, classifications)) return flask.jsonify({'classifications': joined}) else: return flask.render_template('models/images/classification/classify_many.html', model_job = model_job, job = inference_job, paths = paths, classifications = classifications, show_ground_truth= not(ground_truths == [None]*len(ground_truths)), ground_truths = ground_truths )
def parse_folder(folder, labels_file, train_file=None, percent_train=None, val_file=None, percent_val=None, test_file=None, percent_test=None, min_per_category=2, max_per_category=None, ): """ Parses a folder of images into three textfiles Returns True on sucess Arguments: folder -- a folder containing folders of images (can be a filesystem path or a url) labels_file -- file for labels Keyword Arguments: train_file -- output file for training images percent_test -- percentage of images to use in the training set val_file -- output file for validation images percent_val -- percentage of images to use in the validation set test_file -- output file for test images percent_test -- percentage of images to use in the test set min_per_category -- minimum number of images per category max_per_category -- maximum number of images per category """ create_labels = (percent_train > 0) labels = [] ### Read the labels from labels_file if not create_labels: with open(labels_file) as infile: for line in infile: line = line.strip() if line: labels.append(line) ### Verify that at least two category folders exist folder_is_url = utils.is_url(folder) if folder_is_url: if not folder.endswith('/'): folder += '/' subdirs, _ = parse_web_listing(folder) else: if os.path.exists(folder) and os.path.isdir(folder): subdirs = [] for filename in os.listdir(folder): subdir = os.path.join(folder, filename) if os.path.isdir(subdir): subdirs.append(subdir) else: logger.error('folder does not exist') return False subdirs.sort() if len(subdirs) < 2: logger.error('folder must contain at least two subdirectories') return False ### Parse the folder train_count = 0 val_count = 0 test_count = 0 if percent_train: train_outfile = open(train_file, 'w') if percent_val: val_outfile = open(val_file, 'w') if percent_test: test_outfile = open(test_file, 'w') subdir_index = 0 label_index = 0 for subdir in subdirs: # Use the directory name as the label label_name = subdir if folder_is_url: label_name = unescape(label_name) else: label_name = os.path.basename(label_name) label_name = label_name.replace('_',' ') if label_name.endswith('/'): # Remove trailing slash label_name = label_name[0:-1] if create_labels: labels.append(label_name) label_index = len(labels)-1 else: found = False for i, l in enumerate(labels): if label_name == l: found = True label_index = i break if not found: logger.warning('Category "%s" not found in labels_file. Skipping.' % label_name) continue logger.debug('Category - %s' % label_name) lines = [] ### Read all images in the folder if folder_is_url: urls, _ = web_listing_all_files(folder + subdir, max_count=max_per_category) for url in urls: lines.append('%s %d' % (url, label_index)) else: for dirpath, dirnames, filenames in os.walk(os.path.join(folder, subdir), followlinks=True): for filename in filenames: if filename.lower().endswith(utils.image.SUPPORTED_EXTENSIONS): lines.append('%s %d' % (os.path.join(folder, subdir, dirpath, filename), label_index)) if max_per_category is not None and len(lines) >= max_per_category: break if max_per_category is not None and len(lines) >= max_per_category: logger.warning('Reached maximum limit for this category') break ### Split up the lines train_lines = [] val_lines = [] test_lines = [] required_categories = 0 if percent_train > 0: required_categories += 1 if percent_val > 0: required_categories += 1 if percent_test > 0: required_categories += 1 if not lines or len(lines) < required_categories or len(lines) < min_per_category: logger.warning('Not enough images for this category') labels.pop() else: random.shuffle(lines) a, b = three_way_split_indices(len(lines), percent_val, percent_test) train_lines = lines[:a] val_lines = lines[a:b] test_lines = lines[b:] if train_lines: train_outfile.write('\n'.join(train_lines) + '\n') train_count += len(train_lines) if val_lines: val_outfile.write('\n'.join(val_lines) + '\n') val_count += len(val_lines) if test_lines: test_outfile.write('\n'.join(test_lines) + '\n') test_count += len(test_lines) subdir_index += 1 logger.debug('Progress: %0.2f' % (float(subdir_index)/len(subdirs))) if percent_train: train_outfile.close() if percent_val: val_outfile.close() if percent_test: test_outfile.close() if create_labels: if len(labels) < 2: logger.error('Did not find two valid categories') return False else: with open(labels_file, 'w') as labels_outfile: labels_outfile.write('\n'.join(labels) + '\n') logger.info('Found %d images in %d categories.' % (train_count + val_count + test_count, len(labels))) logger.info('Selected %d for training.' % train_count) logger.info('Selected %d for validation.' % val_count) logger.info('Selected %d for testing.' % test_count) return True