def from_files(self): """ Checks for files already in the directory """ assert len(self.tasks) == 0 assert os.path.exists(self.path(utils.constants.TRAIN_FILE)) assert os.path.exists(self.path(utils.constants.LABELS_FILE)) self.labels_file = utils.constants.LABELS_FILE self.tasks.append( tasks.CreateDbTask( job_dir=self.dir(), input_file=utils.constants.TRAIN_FILE, db_name=utils.constants.TRAIN_DB, image_dims=self.image_dims, resize_mode=self.resize_mode, mean_file=utils.constants.MEAN_FILE_CAFFE, labels_file=self.labels_file, )) if os.path.exists(self.path(utils.constants.VAL_FILE)): self.tasks.append( tasks.CreateDbTask( job_dir=self.dir(), input_file=utils.constants.VAL_FILE, db_name=utils.constants.VAL_DB, image_dims=self.image_dims, resize_mode=self.resize_mode, labels_file=self.labels_file, )) if os.path.exists(self.path(utils.constants.TEST_FILE)): self.tasks.append( tasks.CreateDbTask( job_dir=self.dir(), input_file=utils.constants.TEST_FILE, db_name=utils.constants.TEST_DB, image_dims=self.image_dims, resize_mode=self.resize_mode, labels_file=self.labels_file, ))
def from_folders(job, form): """ Add tasks for creating a dataset by parsing folders of images """ job.labels_file = utils.constants.LABELS_FILE # Add ParseFolderTask percent_val = form.folder_pct_val.data val_parents = [] if form.has_val_folder.data: percent_val = 0 percent_test = form.folder_pct_test.data test_parents = [] if form.has_test_folder.data: percent_test = 0 min_per_class = form.folder_train_min_per_class.data max_per_class = form.folder_train_max_per_class.data if max_per_class is None: max_per_class = 0 parse_train_task = tasks.ParseFolderTask( job_dir=job.dir(), folder=form.folder_train.data, percent_val=percent_val, percent_test=percent_test, min_per_category=min_per_class if min_per_class > 0 else 1, max_per_category=max_per_class if max_per_class > 0 else None) job.tasks.append(parse_train_task) # set parents if not form.has_val_folder.data: val_parents = [parse_train_task] if not form.has_test_folder.data: test_parents = [parse_train_task] if form.has_val_folder.data: min_per_class = form.folder_val_min_per_class.data max_per_class = form.folder_val_max_per_class.data if max_per_class is None: max_per_class = 0 parse_val_task = tasks.ParseFolderTask( job_dir=job.dir(), parents=parse_train_task, folder=form.folder_val.data, percent_val=100, percent_test=0, min_per_category=min_per_class if min_per_class > 0 else 1, max_per_category=max_per_class if max_per_class > 0 else None) job.tasks.append(parse_val_task) val_parents = [parse_val_task] if form.has_test_folder.data: min_per_class = form.folder_test_min_per_class.data max_per_class = form.folder_test_max_per_class.data if max_per_class is None: max_per_class = 0 parse_test_task = tasks.ParseFolderTask( job_dir=job.dir(), parents=parse_train_task, folder=form.folder_test.data, percent_val=0, percent_test=100, min_per_category=min_per_class if min_per_class > 0 else 1, max_per_category=max_per_class if max_per_class > 0 else None) job.tasks.append(parse_test_task) test_parents = [parse_test_task] # Add CreateDbTasks backend = form.backend.data encoding = form.encoding.data compression = form.compression.data job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=parse_train_task, input_file=utils.constants.TRAIN_FILE, db_name=utils.constants.TRAIN_DB, backend=backend, image_dims=job.image_dims, resize_mode=job.resize_mode, encoding=encoding, compression=compression, mean_file=utils.constants.MEAN_FILE_CAFFE, labels_file=job.labels_file, )) if percent_val > 0 or form.has_val_folder.data: job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=val_parents, input_file=utils.constants.VAL_FILE, db_name=utils.constants.VAL_DB, backend=backend, image_dims=job.image_dims, resize_mode=job.resize_mode, encoding=encoding, compression=compression, labels_file=job.labels_file, )) if percent_test > 0 or form.has_test_folder.data: job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=test_parents, input_file=utils.constants.TEST_FILE, db_name=utils.constants.TEST_DB, backend=backend, image_dims=job.image_dims, resize_mode=job.resize_mode, encoding=encoding, compression=compression, labels_file=job.labels_file, ))
def from_s3(job, form): """ Add tasks for creating a dataset by parsing s3s of images """ job.labels_file = utils.constants.LABELS_FILE # Add Parses3Task percent_val = form.s3_pct_val.data val_parents = [] percent_test = form.s3_pct_test.data test_parents = [] min_per_class = form.s3_train_min_per_class.data max_per_class = form.s3_train_max_per_class.data delete_files = not form.s3_keepcopiesondisk.data parse_train_task = tasks.ParseS3Task( job_dir=job.dir(), s3_endpoint_url=form.s3_endpoint_url.data, s3_bucket=form.s3_bucket.data, s3_path=form.s3_path.data, s3_accesskey=form.s3_accesskey.data, s3_secretkey=form.s3_secretkey.data, percent_val=percent_val, percent_test=percent_test, min_per_category=min_per_class if min_per_class > 0 else 1, max_per_category=max_per_class if max_per_class > 0 else None) job.tasks.append(parse_train_task) # set parents val_parents = [parse_train_task] test_parents = [parse_train_task] # Add CreateDbTasks backend = form.backend.data encoding = form.encoding.data compression = form.compression.data job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=parse_train_task, input_file=utils.constants.TRAIN_FILE, db_name=utils.constants.TRAIN_DB, backend=backend, image_dims=job.image_dims, resize_mode=job.resize_mode, encoding=encoding, compression=compression, mean_file=utils.constants.MEAN_FILE_CAFFE, labels_file=job.labels_file, delete_files=delete_files, )) if percent_val > 0: job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=val_parents, input_file=utils.constants.VAL_FILE, db_name=utils.constants.VAL_DB, backend=backend, image_dims=job.image_dims, resize_mode=job.resize_mode, encoding=encoding, compression=compression, labels_file=job.labels_file, delete_files=delete_files, )) if percent_test > 0: job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=test_parents, input_file=utils.constants.TEST_FILE, db_name=utils.constants.TEST_DB, backend=backend, image_dims=job.image_dims, resize_mode=job.resize_mode, encoding=encoding, compression=compression, labels_file=job.labels_file, delete_files=delete_files, ))
def from_files(job, form): """ Add tasks for creating a dataset by reading textfiles """ # labels if form.textfile_use_local_files.data: labels_file_from = form.textfile_local_labels_file.data.strip() labels_file_to = os.path.join(job.dir(), utils.constants.LABELS_FILE) shutil.copyfile(labels_file_from, labels_file_to) else: flask.request.files[form.textfile_labels_file.name].save( os.path.join(job.dir(), utils.constants.LABELS_FILE)) job.labels_file = utils.constants.LABELS_FILE shuffle = bool(form.textfile_shuffle.data) backend = form.backend.data encoding = form.encoding.data compression = form.compression.data # train if form.textfile_use_local_files.data: train_file = form.textfile_local_train_images.data.strip() else: flask.request.files[form.textfile_train_images.name].save( os.path.join(job.dir(), utils.constants.TRAIN_FILE)) train_file = utils.constants.TRAIN_FILE image_folder = form.textfile_train_folder.data.strip() if not image_folder: image_folder = None job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), input_file=train_file, db_name=utils.constants.TRAIN_DB, backend=backend, image_dims=job.image_dims, image_folder=image_folder, resize_mode=job.resize_mode, encoding=encoding, compression=compression, mean_file=utils.constants.MEAN_FILE_CAFFE, labels_file=job.labels_file, shuffle=shuffle, )) # val if form.textfile_use_val.data: if form.textfile_use_local_files.data: val_file = form.textfile_local_val_images.data.strip() else: flask.request.files[form.textfile_val_images.name].save( os.path.join(job.dir(), utils.constants.VAL_FILE)) val_file = utils.constants.VAL_FILE image_folder = form.textfile_val_folder.data.strip() if not image_folder: image_folder = None job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), input_file=val_file, db_name=utils.constants.VAL_DB, backend=backend, image_dims=job.image_dims, image_folder=image_folder, resize_mode=job.resize_mode, encoding=encoding, compression=compression, labels_file=job.labels_file, shuffle=shuffle, )) # test if form.textfile_use_test.data: if form.textfile_use_local_files.data: test_file = form.textfile_local_test_images.data.strip() else: flask.request.files[form.textfile_test_images.name].save( os.path.join(job.dir(), utils.constants.TEST_FILE)) test_file = utils.constants.TEST_FILE image_folder = form.textfile_test_folder.data.strip() if not image_folder: image_folder = None job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), input_file=test_file, db_name=utils.constants.TEST_DB, backend=backend, image_dims=job.image_dims, image_folder=image_folder, resize_mode=job.resize_mode, encoding=encoding, compression=compression, labels_file=job.labels_file, shuffle=shuffle, ))
def from_folders(job, form): """ Add tasks for creating a dataset by parsing folders of images """ job.labels_file = utils.constants.LABELS_FILE ### Add ParseFolderTask percent_val = form.folder_pct_val.data val_parents = [] if form.has_val_folder.data: percent_val = 0 percent_test = form.folder_pct_test.data test_parents = [] if form.has_test_folder.data: percent_test = 0 parse_train_task = tasks.ParseFolderTask( job_dir=job.dir(), folder=form.folder_train.data, percent_val=percent_val, percent_test=percent_test, ) job.tasks.append(parse_train_task) # set parents if not form.has_val_folder.data: val_parents = [parse_train_task] if not form.has_test_folder.data: test_parents = [parse_train_task] if form.has_val_folder.data: parse_val_task = tasks.ParseFolderTask( job_dir=job.dir(), parents=parse_train_task, folder=form.folder_val.data, percent_val=100, percent_test=0, ) job.tasks.append(parse_val_task) val_parents = [parse_val_task] if form.has_test_folder.data: parse_test_task = tasks.ParseFolderTask( job_dir=job.dir(), parents=parse_train_task, folder=form.folder_test.data, percent_val=0, percent_test=100, ) job.tasks.append(parse_test_task) test_parents = [parse_test_task] ### Add CreateDbTasks encode = form.encode_images.data job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=parse_train_task, input_file=utils.constants.TRAIN_FILE, db_name=utils.constants.TRAIN_DB, image_dims=job.image_dims, resize_mode=job.resize_mode, encode=encode, mean_file=utils.constants.MEAN_FILE_CAFFE, labels_file=job.labels_file, )) if percent_val > 0 or form.has_val_folder.data: job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=val_parents, input_file=utils.constants.VAL_FILE, db_name=utils.constants.VAL_DB, image_dims=job.image_dims, resize_mode=job.resize_mode, encode=encode, labels_file=job.labels_file, )) if percent_test > 0 or form.has_test_folder.data: job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=test_parents, input_file=utils.constants.TEST_FILE, db_name=utils.constants.TEST_DB, image_dims=job.image_dims, resize_mode=job.resize_mode, encode=encode, labels_file=job.labels_file, ))
def from_files(job, form): """ Add tasks for creating a dataset by reading textfiles """ ### labels request.files[form.textfile_labels_file.name].save( os.path.join(job.dir(), utils.constants.LABELS_FILE)) job.labels_file = utils.constants.LABELS_FILE ### train request.files[form.textfile_train_images.name].save( os.path.join(job.dir(), utils.constants.TRAIN_FILE)) image_folder = form.textfile_train_folder.data.strip() if not image_folder: image_folder = None job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), input_file=utils.constants.TRAIN_FILE, db_name=utils.constants.TRAIN_DB, image_dims=job.image_dims, image_folder=image_folder, resize_mode=job.resize_mode, mean_file=utils.constants.MEAN_FILE_CAFFE, labels_file=job.labels_file, )) ### val if form.textfile_use_val.data: request.files[form.textfile_val_images.name].save( os.path.join(job.dir(), utils.constants.VAL_FILE)) image_folder = form.textfile_val_folder.data.strip() if not image_folder: image_folder = None job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), input_file=utils.constants.VAL_FILE, db_name=utils.constants.VAL_DB, image_dims=job.image_dims, image_folder=image_folder, resize_mode=job.resize_mode, labels_file=job.labels_file, )) ### test if form.textfile_use_test.data: request.files[form.textfile_test_images.name].save( os.path.join(job.dir(), utils.constants.TEST_FILE)) image_folder = form.textfile_test_folder.data.strip() if not image_folder: image_folder = None job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), input_file=utils.constants.TEST_FILE, db_name=utils.constants.TEST_DB, image_dims=job.image_dims, image_folder=image_folder, resize_mode=job.resize_mode, labels_file=job.labels_file, ))
def from_folder( self, folder, percent_val=None, percent_test=None, min_per_category=None, max_per_category=None, ): """ Add tasks for creating a dataset by parsing a folder of images Arguments: folder -- the folder to parse Keyword arguments: percent_val -- percent of images to use for validation percent_test -- percent of images to use for testing min_per_category -- minimum images per category max_per_category -- maximum images per category """ assert len(self.tasks) == 0 self.labels_file = utils.constants.LABELS_FILE ### Add ParseFolderTask task = tasks.ParseFolderTask(job_dir=self.dir(), folder=folder, percent_val=percent_val, percent_test=percent_test) self.tasks.append(task) ### Add CreateDbTasks self.tasks.append( tasks.CreateDbTask( job_dir=self.dir(), parents=task, input_file=utils.constants.TRAIN_FILE, db_name=utils.constants.TRAIN_DB, image_dims=self.image_dims, resize_mode=self.resize_mode, mean_file=utils.constants.MEAN_FILE_CAFFE, labels_file=self.labels_file, )) if task.percent_val > 0: self.tasks.append( tasks.CreateDbTask( job_dir=self.dir(), parents=task, input_file=utils.constants.VAL_FILE, db_name=utils.constants.VAL_DB, image_dims=self.image_dims, resize_mode=self.resize_mode, labels_file=self.labels_file, )) if task.percent_test > 0: self.tasks.append( tasks.CreateDbTask( job_dir=self.dir(), parents=task, input_file=utils.constants.TEST_FILE, db_name=utils.constants.TEST_DB, image_dims=self.image_dims, resize_mode=self.resize_mode, labels_file=self.labels_file, ))
def from_json(job, form): """ Add tasks for creating a dataset by parsing folders of images """ root_path = form.jsonfile_root_path.data.strip() if not root_path: root_path = None job.labels_file = utils.constants.LABELS_FILE flask.request.files[form.jsonfile_train_test_split.name].save( os.path.join(job.dir(), utils.constants.SPLIT_FILE)) flask.request.files[form.jsonfile_annotations.name].save( os.path.join(job.dir(), utils.constants.JSON_FILE)) ### Add JsonTask parse_json_task = tasks.ParseJsonTask( job_dir=job.dir(), json_annotation_file=utils.constants.JSON_FILE, train_test_split_file=utils.constants.SPLIT_FILE, ) job.tasks.append(parse_json_task) ### Add CreateDbTasks backend = form.backend.data encoding = form.encoding.data compression = form.compression.data job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=parse_json_task, input_file=utils.constants.TRAIN_FILE, db_name=utils.constants.TRAIN_DB, backend=backend, image_dims=job.image_dims, image_folder=root_path, resize_mode=job.resize_mode, mean_file=utils.constants.MEAN_FILE_CAFFE, encoding=encoding, compression=compression, labels_file=job.labels_file, get_bboxes=job.get_bboxes, scale_factor=job.scale_factor, )) job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=parse_json_task, input_file=utils.constants.VAL_FILE, db_name=utils.constants.VAL_DB, backend=backend, image_dims=job.image_dims, image_folder=root_path, resize_mode=job.resize_mode, labels_file=job.labels_file, encoding=encoding, compression=compression, get_bboxes=job.get_bboxes, scale_factor=job.scale_factor, shuffle=False, )) job.tasks.append( tasks.CreateDbTask( job_dir=job.dir(), parents=parse_json_task, input_file=utils.constants.TEST_FILE, db_name=utils.constants.TEST_DB, backend=backend, image_dims=job.image_dims, image_folder=root_path, resize_mode=job.resize_mode, labels_file=job.labels_file, encoding=encoding, compression=compression, get_bboxes=job.get_bboxes, scale_factor=job.scale_factor, shuffle=False, ))