def fs_write_result(config_to_summary_path, config_to_result_path, result, dirname='./'): """Write a result to the local filesystem File written depends on output of config_to_result_path AND directory Args: config_to_result_path: ('config' -> string) function to generate location in local filesystem to write to. Possibly modified by 'dirname' argument result: 'result' to be written to local filesystem. Must be serializable via pickle. Must have a value for the key 'config' dirname: (string) directory to prepend to output of config_to_result_path Returns: None """ assert _is_valid_result(result) summary = { key:result.get(key, {}) for key in ['config', 'summary'] } config = result['config'] summary_filepath = config_to_summary_path(config) result_filepath = config_to_result_path(config) # filepath may contain directories full_result_filepath = os.path.join(dirname, result_filepath) _dirname = os.path.split(full_result_filepath)[0] ensure_dir(_dirname) # pickle(summary, summary_filepath, dir=dirname) pickle(result, result_filepath, dir=dirname) return
def do_experiments(config_list, runner, writer, dirname='./', mapper=map): """Runs and writes provided 'config's using provided runner, writer, mapper Same as do_experiment but takes list of 'config's and optional mapper argument. Optional mapper argument allows multiprocessing or IPython.parallel Args: config_list: (list of 'config's) 'config's to run with runner runner: ('config' -> 'result') function that takes config and returns result. This is where the computation occurs. writer: ('result' -> None) function that takes single result and writes it to local filesystem dirname: (string) local filesystem directory to write serialize 'result's to mapper: (function, args -> outputs) mapper to use. If runner spawns daemonic processes, mapper must be non-daemonic. Returns: None """ ensure_dir(dirname) config_list = ensure_listlike(config_list) _do_experiment = partial(do_experiment, runner=runner, writer=writer, dirname=dirname) mapper(_do_experiment, config_list) return
def get_s3(self, filename, do_print=True): gu.printif("getS3('%s')" % filename, do_print) full_filename = self.get_full_filename(filename) # key = self.get_key(filename) success = False if key is not None: fu.ensure_dir(self.local_dir) key.get_contents_to_filename(full_filename) success = True return success
def create_train_and_validation_dirs(path): train_path = os.path.join(path, 'TRAIN') validation_path = os.path.join(path, 'VALIDATION') for path in [train_path, validation_path]: ensure_dir(path) clean_dir(path) print ''' created train and validation dirs at: {} {}'''.format(train_path, validation_path)
def gen_images_from_film(path, out_dir='./data/frames'): ensure_dir(out_dir) vidcap = cv2.VideoCapture(path) success, image = vidcap.read() count = 0 while success: success, image = vidcap.read() cv2.imwrite(out_dir + '/%d.jpg' % count, image) count += 1 print 'finished creating %d frames' % count
def symlink_images(image_filenames, symlink_dir='/tmp/categories', category='akira', suffix=None, report_every=100): category_dir = os.path.join(symlink_dir, category) clean_dir(category_dir) ensure_dir(category_dir) for idx, source in enumerate(image_filenames): suffix = suffix or '.' + source.split('.')[-1] output_path = os.path.join(symlink_dir, category, '{}{}'.format(idx, suffix)) os.symlink(source, output_path) # if __name__ == "__main__": # steps = select_by_step(step=600) # symlink_images(steps, symlink_dir=os.environ['BUILDING_DIR'] + '/categories')
def build_train_and_validation_sets(source_dir, build_dir, train_ratio=0.7, frame_step=24): classification = source_dir.split('/')[-1] validation_dir = os.path.join(build_dir, 'VALIDATION', classification) train_dir = os.path.join(build_dir, 'TRAIN', classification) #ensure directory structure a la '${build_dir}/${VALIDATION || TRAIN}/${CLASSIFICATION}' exists ensure_dir(train_dir) ensure_dir(validation_dir) # now we need to pull images from source dir, order them numerically, # trim the edge percentages, select images at specified offsets, randomize them, and put them in respective dirs print 'processing image files for class: {}'.format(classification) image_files = trim_edge_photos(source_dir) image_files = select_every(frame_step, image_files) image_files = randomize_order(image_files) split_idx = int(math.floor(train_ratio * len(image_files))) link_images(image_files[:split_idx], train_dir) link_images(image_files[split_idx:], validation_dir) print 'finished {} train and {} validation images for class: {}'.format( split_idx, len(image_files) - split_idx, classification)
len(image_files) - split_idx, classification) if __name__ == "__main__": try: classes_parent_dir = os.environ['CLASSES_PARENT_DIR'] except: classes_parent_dir = os.path.join(os.getenv('HOME'), 'block', 'film_images') classes_dirs = get_files_in_dir(classes_parent_dir) classes = map(lambda x: x.split('/')[-1].split('.')[0], classes_dirs) try: build_dir = os.environ['BUILD_DIR'] except: build_dir = os.path.join(os.getcwd(), 'build') #start fresh by removing build dir create_train_and_validation_dirs(build_dir) output_dir = os.path.join(build_dir, 'output') ensure_dir(output_dir) clean_dir(output_dir) for class_dir in classes_dirs: build_train_and_validation_sets(class_dir, build_dir) print ''' train/validation sets available at: {}'''.format(build_dir)
import gen_images import glob import os from file_utils import ensure_dir INPUT_DIR = os.path.join(os.getenv('HOME'), 'Downloads/films') OUTPUT_DIR = os.path.join(os.getenv('HOME'), 'block/film_images') ensure_dir(OUTPUT_DIR) files = glob.glob(os.path.join(INPUT_DIR, '*')) for file in files: print 'processing %s' % file film_out_dir = os.path.join(OUTPUT_DIR, file.split('.')[-2].split('/')[-1]) # print file, film_out_dir, file.split('.')[-2].split('/')[-1] gen_images.gen_images_from_film(file, film_out_dir)