def image_upload(cleanup=True, **kwargs): r""" Returns the gid for an uploaded image. Args: image (image binary): the POST variable containing the binary (multi-form) image data **kwargs: Arbitrary keyword arguments; the kwargs are passed down to the add_images function Returns: gid (rowids): gid corresponding to the image submitted. lexigraphical order. RESTful: Method: POST URL: /api/upload/image/ """ ibs = current_app.ibs print('request.files = %s' % (request.files,)) filestore = request.files.get('image', None) if filestore is None: raise IOError('Image not given') uploads_path = ibs.get_uploadsdir() ut.ensuredir(uploads_path) current_time = time.strftime('%Y_%m_%d_%H_%M_%S') modifier = 1 upload_filename = 'upload_%s.png' % (current_time) while exists(upload_filename): upload_filename = 'upload_%s_%04d.png' % (current_time, modifier) modifier += 1 upload_filepath = join(uploads_path, upload_filename) filestore.save(upload_filepath) gid_list = ibs.add_images([upload_filepath], **kwargs) gid = gid_list[0] if cleanup: ut.remove_dirs(upload_filepath) return gid
def image_upload(cleanup=True, **kwargs): r""" Returns the gid for an uploaded image. Args: image (image binary): the POST variable containing the binary (multi-form) image data **kwargs: Arbitrary keyword arguments; the kwargs are passed down to the add_images function Returns: gid (rowids): gid corresponding to the image submitted. lexigraphical order. RESTful: Method: POST URL: /api/image/ """ ibs = current_app.ibs print('request.files = %s' % (request.files,)) filestore = request.files.get('image', None) if filestore is None: raise IOError('Image not given') uploads_path = ibs.get_uploadsdir() ut.ensuredir(uploads_path) current_time = time.strftime('%Y_%m_%d_%H_%M_%S') modifier = 1 upload_filename = 'upload_%s.png' % (current_time) while exists(upload_filename): upload_filename = 'upload_%s_%04d.png' % (current_time, modifier) modifier += 1 upload_filepath = join(uploads_path, upload_filename) filestore.save(upload_filepath) gid_list = ibs.add_images([upload_filepath], **kwargs) gid = gid_list[0] if cleanup: ut.remove_dirs(upload_filepath) return gid
def process_image_directory(project_name, size, reset=True): # Raw folders raw_path = abspath(join('..', 'data', 'raw')) processed_path = abspath(join('..', 'data', 'processed')) # Project folders project_raw_path = join(raw_path, project_name) project_processed_path = join(processed_path, project_name) # Load raw data direct = Directory(project_raw_path, include_extensions='images') # Reset / create paths if not exist if exists(project_processed_path) and reset: ut.remove_dirs(project_processed_path) ut.ensuredir(project_processed_path) # Process by resizing the images into the desired shape for file_path in direct.files(): file_name = basename(file_path) print('Processing %r' % (file_name, )) image = cv2.imread(file_path) image = cv2.resize(image, size, interpolation=cv2.INTER_LANCZOS4) dest_path = join(project_processed_path, file_name) cv2.imwrite(dest_path, image)
def image_upload_zip(**kwargs): r""" Returns the gid_list for image files submitted in a ZIP archive. The image archive should be flat (no folders will be scanned for images) and must be smaller than 100 MB. The archive can submit multiple images, ideally in JPEG format to save space. Duplicate image uploads will result in the duplicate images receiving the same gid based on the hashed pixel values. Args: image_zip_archive (binary): the POST variable containing the binary (multi-form) image archive data **kwargs: Arbitrary keyword arguments; the kwargs are passed down to the add_images function Returns: gid_list (list if rowids): the list of gids corresponding to the images submitted. The gids correspond to the image names sorted in lexigraphical order. RESTful: Method: POST URL: /api/image/zip """ ibs = current_app.ibs # Get image archive image_archive = request.files.get('image_zip_archive', None) if image_archive is None: raise IOError('Image archive not given') # If the directory already exists, delete it uploads_path = ibs.get_uploadsdir() ut.ensuredir(uploads_path) current_time = time.strftime('%Y_%m_%d_%H_%M_%S') modifier = 1 upload_path = '%s' % (current_time) while exists(upload_path): upload_path = '%s_%04d' % (current_time, modifier) modifier += 1 upload_path = join(uploads_path, upload_path) ut.ensuredir(upload_path) # Extract the content try: with zipfile.ZipFile(image_archive, 'r') as zfile: zfile.extractall(upload_path) except Exception: ut.remove_dirs(upload_path) raise IOError('Image archive extracton failed') """ test to ensure Directory and utool do the same thing from wbia.detecttools.directory import Directory upload_path = ut.truepath('~/Pictures') gpath_list1 = sorted(ut.list_images(upload_path, recursive=False, full=True)) direct = Directory(upload_path, include_file_extensions='images', recursive=False) gpath_list = direct.files() gpath_list = sorted(gpath_list) assert gpath_list1 == gpath_list """ gpath_list = sorted(ut.list_images(upload_path, recursive=False, full=True)) # direct = Directory(upload_path, include_file_extensions='images', recursive=False) # gpath_list = direct.files() # gpath_list = sorted(gpath_list) gid_list = ibs.add_images(gpath_list, **kwargs) return gid_list
def show_confusion_matrix(correct_y, predict_y, category_list, results_path, mapping_fn=None, data_x=None): """ Given the correct and predict labels, show the confusion matrix Args: correct_y (list of int): the list of correct labels predict_y (list of int): the list of predict assigned labels category_list (list of str): the category list of all categories Displays: matplotlib: graph of the confusion matrix Returns: None TODO FIXME and simplify """ import matplotlib.pyplot as plt confused_examples = join(results_path, 'confused') if data_x is not None: if exists(confused_examples): ut.remove_dirs(confused_examples, quiet=True) ut.ensuredir(confused_examples) size = len(category_list) if mapping_fn is None: # Identity category_mapping = {key: index for index, key in enumerate(category_list)} category_list_ = category_list else: category_mapping = mapping_fn(category_list) assert all([category in category_mapping.keys() for category in category_list]), 'Not all categories are mapped' values = list(category_mapping.values()) assert len(list(set(values))) == len( values), 'Mapped categories have a duplicate assignment' assert 0 in values, 'Mapped categories must have a 0 index' temp = list(category_mapping.iteritems()) temp = sorted(temp, key=itemgetter(1)) category_list_ = [t[0] for t in temp] confidences = np.zeros((size, size)) counters = {} for index, (correct, predict) in enumerate(zip(correct_y, predict_y)): # Ensure type correct = int(correct) predict = int(predict) # Get the "text" label example_correct_label = category_list[correct] example_predict_label = category_list[predict] # Perform any mapping that needs to be done correct_ = category_mapping[example_correct_label] predict_ = category_mapping[example_predict_label] # Add to the confidence matrix confidences[correct_][predict_] += 1 if data_x is not None and correct_ != predict_: example = data_x[index] example_name = '%s^SEEN_INCORRECTLY_AS^%s' % ( example_correct_label, example_predict_label, ) if example_name not in counters.keys(): counters[example_name] = 0 counter = counters[example_name] counters[example_name] += 1 example_name = '%s^%d.png' % (example_name, counter) example_path = join(confused_examples, example_name) # TODO: make write confused examples function cv2.imwrite(example_path, example) row_sums = np.sum(confidences, axis=1) norm_conf = (confidences.T / row_sums).T fig = plt.figure(1) plt.clf() ax = fig.add_subplot(111) ax.set_aspect(1) res = ax.imshow(np.array(norm_conf), cmap=plt.cm.jet, interpolation='nearest') for x in range(size): for y in range(size): ax.annotate(str(int(confidences[x][y])), xy=(y, x), horizontalalignment='center', verticalalignment='center') cb = fig.colorbar(res) # NOQA plt.xticks(np.arange(size), category_list_[0:size], rotation=90) plt.yticks(np.arange(size), category_list_[0:size]) margin_small = 0.1 margin_large = 0.9 plt.subplots_adjust(left=margin_small, right=margin_large, bottom=margin_small, top=margin_large) plt.xlabel('Predicted') plt.ylabel('Correct') return fig
def numpy_processed_directory(project_name, numpy_ids_file_name='ids.npy', numpy_x_file_name='X.npy', numpy_y_file_name='y.npy', labels_file_name='labels.csv', reset=True): # Raw folders processed_path = abspath(join('..', 'data', 'processed')) labels_path = abspath(join('..', 'data', 'labels')) numpy_path = abspath(join('..', 'data', 'numpy')) # Project folders project_processed_path = join(processed_path, project_name) project_labels_path = join(labels_path, project_name) project_numpy_path = join(numpy_path, project_name) # Project files project_numpy_ids_file_name = join(project_numpy_path, numpy_ids_file_name) project_numpy_x_file_name = join(project_numpy_path, numpy_x_file_name) project_numpy_y_file_name = join(project_numpy_path, numpy_y_file_name) project_numpy_labels_file_name = join(project_labels_path, labels_file_name) # Load raw data direct = Directory(project_processed_path, include_extensions='images') label_dict = {} for line in open(project_numpy_labels_file_name): line = line.strip().split(',') file_name = line[0].strip() label = line[1].strip() label_dict[file_name] = label # Reset / create paths if not exist if exists(project_numpy_path) and reset: ut.remove_dirs(project_numpy_path) ut.ensuredir(project_numpy_path) # Get shape for all images shape_x = list(cv2.imread(direct.files()[0]).shape) if len(shape_x) == 2: shape_x = shape_x + [1] shape_x = tuple([len(direct.files())] + shape_x[::-1]) # NOQA shape_y = shape_x[0:1] # NOQA # Create numpy arrays # X = np.empty(shape_x, dtype=np.uint8) # y = np.empty(shape_y, dtype=np.uint8) ids = [] X = [] y = [] # Process by loading images into the numpy array for saving for index, file_path in enumerate(direct.files()): file_name = basename(file_path) print('Processing %r' % (file_name, )) image = cv2.imread(file_path) try: label = label_dict[file_name] # X[index] = np.array(cv2.split(image)) # y[index] = label # X.append(np.array(cv2.split(image))) # Lasange format ids.append(file_name) X.append(image) # cv2 format y.append(label) except KeyError: print('Cannot find label...skipping') # raw_input() ids = np.array(ids) X = np.array(X, dtype=np.uint8) # y = np.array(y, dtype=np.uint8) y = np.array(y) # Save numpy array print(' ids.shape = %r' % (ids.shape,)) print(' ids.dtype = %r' % (ids.dtype,)) print(' X.shape = %r' % (X.shape,)) print(' X.dtype = %r' % (X.dtype,)) print(' y.shape = %r' % (y.shape,)) print(' y.dtype = %r' % (y.dtype,)) np.save(project_numpy_ids_file_name, ids) np.save(project_numpy_x_file_name, X) np.save(project_numpy_y_file_name, y)
def train_gid_list(ibs, gid_list, trees_path=None, species=None, setup=True, teardown=False, **kwargs): """ Args: gid_list (list of int): the list of IBEIS image_rowids that need detection trees_path (str): the path that the trees will be saved into (along with temporary training inventory folders that are deleted once training is finished) species (str): the species that should be used to assign to the newly trained trees Kwargs (optional): refer to the PyRF documentation for configuration settings Returns: None """ print("[randomforest.train()] training with %d gids and species=%r" % (len(gid_list), species)) if trees_path is None and species is not None: trees_path = join(ibs.get_cachedir(), "trees", species) # Get positive chip paths if species is None: aids_list = ibs.get_image_aids(gid_list) else: aids_list = ibs.get_image_aids_of_species(gid_list, species) # ##### TEMP ##### # gid_list_ = [] # aids_list_ = [] # for gid, aid_list in zip(gid_list, aids_list): # if len(aid_list) > 1: # gid_list_.append(gid) # aids_list_.append(aid_list) # elif len(aid_list) == 1: # (xtl, ytl, width, height) = ibs.get_annot_bboxes(aid_list)[0] # if xtl > 5 and ytl > 5: # gid_list_.append(gid) # aids_list_.append(aid_list) # gid_list = gid_list_ # aids_list = aids_list_ # kwargs['trees_max_patches'] = 100000 # ##### TEMP ##### aid_list = ut.flatten(aids_list) train_pos_cpath_list = ibs.get_annot_chip_fpath(aid_list) # Ensure directories for negatives negatives_cache = join(ibs.get_cachedir(), "pyrf_train_negatives") if (setup and not exists(negatives_cache)) or setup == "force": # Force Check if exists(negatives_cache): ut.remove_dirs(negatives_cache) ut.ensuredir(negatives_cache) # Get negative chip paths print("[randomforest.train()] Mining %d negative patches" % (len(train_pos_cpath_list),)) train_neg_cpath_list = [] while len(train_neg_cpath_list) < len(train_pos_cpath_list): sample = random.randint(0, len(gid_list) - 1) gid = gid_list[sample] img_width, img_height = ibs.get_image_sizes(gid) size = min(img_width, img_height) if species is None: aid_list = ibs.get_image_aids(gid) else: aid_list = ibs.get_image_aids_of_species(gid, species) annot_bbox_list = ibs.get_annot_bboxes(aid_list) # Find square patches square = random.randint(int(size / 4), int(size / 2)) xmin = random.randint(0, img_width - square) xmax = xmin + square ymin = random.randint(0, img_height - square) ymax = ymin + square if _valid_candidate((xmin, xmax, ymin, ymax), annot_bbox_list): if VERBOSE_RF: print( "[%d / %d] MINING NEGATIVE PATCH (%04d, %04d, %04d, %04d) FROM GID %d" % (len(train_neg_cpath_list), len(train_pos_cpath_list), xmin, xmax, ymin, ymax, gid) ) img = ibs.get_images(gid) img_path = join(negatives_cache, "neg_%07d.JPEG" % (len(train_neg_cpath_list),)) img = img[ymin:ymax, xmin:xmax] cv2.imwrite(img_path, img) train_neg_cpath_list.append(img_path) else: train_neg_cpath_list = ut.ls(negatives_cache, "*.JPEG") # direct = Directory(negatives_cache, include_extensions=['JPEG']) # train_neg_cpath_list = direct.files() # Train trees train_gpath_list(ibs, train_pos_cpath_list, train_neg_cpath_list, trees_path=trees_path, species=species, **kwargs) # Remove cached negatives directory if teardown: ut.remove_dirs(negatives_cache)
def image_upload_zip(**kwargs): r""" Returns the gid_list for image files submitted in a ZIP archive. The image archive should be flat (no folders will be scanned for images) and must be smaller than 100 MB. The archive can submit multiple images, ideally in JPEG format to save space. Duplicate image uploads will result in the duplicate images receiving the same gid based on the hashed pixel values. Args: image_zip_archive (binary): the POST variable containing the binary (multi-form) image archive data **kwargs: Arbitrary keyword arguments; the kwargs are passed down to the add_images function Returns: gid_list (list if rowids): the list of gids corresponding to the images submitted. The gids correspond to the image names sorted in lexigraphical order. RESTful: Method: POST URL: /api/image/zip """ ibs = current_app.ibs # Get image archive image_archive = request.files.get('image_zip_archive', None) if image_archive is None: raise IOError('Image archive not given') # If the directory already exists, delete it uploads_path = ibs.get_uploadsdir() ut.ensuredir(uploads_path) current_time = time.strftime('%Y_%m_%d_%H_%M_%S') modifier = 1 upload_path = '%s' % (current_time) while exists(upload_path): upload_path = '%s_%04d' % (current_time, modifier) modifier += 1 upload_path = join(uploads_path, upload_path) ut.ensuredir(upload_path) # Extract the content try: with zipfile.ZipFile(image_archive, 'r') as zfile: zfile.extractall(upload_path) except Exception: ut.remove_dirs(upload_path) raise IOError('Image archive extracton failed') """ test to ensure Directory and utool do the same thing from detecttools.directory import Directory upload_path = ut.truepath('~/Pictures') gpath_list1 = sorted(ut.list_images(upload_path, recursive=False, full=True)) direct = Directory(upload_path, include_file_extensions='images', recursive=False) gpath_list = direct.files() gpath_list = sorted(gpath_list) assert gpath_list1 == gpath_list """ gpath_list = sorted(ut.list_images(upload_path, recursive=False, full=True)) #direct = Directory(upload_path, include_file_extensions='images', recursive=False) #gpath_list = direct.files() #gpath_list = sorted(gpath_list) gid_list = ibs.add_images(gpath_list, **kwargs) return gid_list
def train_gid_list(ibs, gid_list, trees_path=None, species=None, setup=True, teardown=False, **kwargs): """ Args: gid_list (list of int): the list of IBEIS image_rowids that need detection trees_path (str): the path that the trees will be saved into (along with temporary training inventory folders that are deleted once training is finished) species (str): the species that should be used to assign to the newly trained trees Kwargs (optional): refer to the PyRF documentation for configuration settings Returns: None """ print("[randomforest.train()] training with %d gids and species=%r" % ( len(gid_list), species, )) if trees_path is None and species is not None: trees_path = join(ibs.get_cachedir(), 'trees', species) # Get positive chip paths if species is None: aids_list = ibs.get_image_aids(gid_list) else: aids_list = ibs.get_image_aids_of_species(gid_list, species) # ##### TEMP ##### # gid_list_ = [] # aids_list_ = [] # for gid, aid_list in zip(gid_list, aids_list): # if len(aid_list) > 1: # gid_list_.append(gid) # aids_list_.append(aid_list) # elif len(aid_list) == 1: # (xtl, ytl, width, height) = ibs.get_annot_bboxes(aid_list)[0] # if xtl > 5 and ytl > 5: # gid_list_.append(gid) # aids_list_.append(aid_list) # gid_list = gid_list_ # aids_list = aids_list_ # kwargs['trees_max_patches'] = 100000 # ##### TEMP ##### aid_list = ut.flatten(aids_list) train_pos_cpath_list = ibs.get_annot_chip_fpath(aid_list) # Ensure directories for negatives negatives_cache = join(ibs.get_cachedir(), 'pyrf_train_negatives') if (setup and not exists(negatives_cache)) or setup == 'force': # Force Check if exists(negatives_cache): ut.remove_dirs(negatives_cache) ut.ensuredir(negatives_cache) # Get negative chip paths print("[randomforest.train()] Mining %d negative patches" % (len(train_pos_cpath_list), )) train_neg_cpath_list = [] while len(train_neg_cpath_list) < len(train_pos_cpath_list): sample = random.randint(0, len(gid_list) - 1) gid = gid_list[sample] img_width, img_height = ibs.get_image_sizes(gid) size = min(img_width, img_height) if species is None: aid_list = ibs.get_image_aids(gid) else: aid_list = ibs.get_image_aids_of_species(gid, species) annot_bbox_list = ibs.get_annot_bboxes(aid_list) # Find square patches square = random.randint(int(size / 4), int(size / 2)) xmin = random.randint(0, img_width - square) xmax = xmin + square ymin = random.randint(0, img_height - square) ymax = ymin + square if _valid_candidate((xmin, xmax, ymin, ymax), annot_bbox_list): if VERBOSE_RF: print( "[%d / %d] MINING NEGATIVE PATCH (%04d, %04d, %04d, %04d) FROM GID %d" % ( len(train_neg_cpath_list), len(train_pos_cpath_list), xmin, xmax, ymin, ymax, gid, )) img = ibs.get_images(gid) img_path = join( negatives_cache, "neg_%07d.JPEG" % (len(train_neg_cpath_list), )) img = img[ymin:ymax, xmin:xmax] cv2.imwrite(img_path, img) train_neg_cpath_list.append(img_path) else: train_neg_cpath_list = ut.ls(negatives_cache, '*.JPEG') #direct = Directory(negatives_cache, include_extensions=['JPEG']) #train_neg_cpath_list = direct.files() # Train trees train_gpath_list(ibs, train_pos_cpath_list, train_neg_cpath_list, trees_path=trees_path, species=species, **kwargs) # Remove cached negatives directory if teardown: ut.remove_dirs(negatives_cache)