def download_filelist(collection_name, fname, root_dir): if collection_name not in precomputed_urls: return False else: url = precomputed_urls[collection_name]['paths'] log.info('[cpuvisor] Downloading dataset filelist...') log.info('[cpuvisor] URL is: %s' % url) assert(os.path.splitext(urlparse.urlparse(url).path)[1] == os.path.splitext(fname)[1]) utils.subproc_call_check(['wget -O %s %s' % (fname, url)], shell=True) # re-generate and check for consistency with utils.make_temp_directory() as temp_dir: regen_fname = os.path.join(temp_dir, os.path.split(fname)[1]) generate_filelist(regen_fname, root_dir, True) if filecmp.cmp(fname, regen_fname): raise RuntimeError('Downloaded filelist for dataset <%s>: %s ' 'is inconsistent with images found in ' 'dataset directory: %s' % (collection_name, fname, root_dir)) return True
def compute_feats(base_dir, collection_name): log.info('[cpuvisor] Computing features for dataset...') with utils.change_cwd(os.path.join(base_dir, 'bin')): utils.subproc_call_check([ './cpuvisor_preproc', '--config_path', '../config.%s.prototxt' % collection_name, '--nonegfeats' ])
def download_feats(collection_name, fname): if collection_name not in precomputed_urls: return False else: url = precomputed_urls[collection_name]['feats'] log.info('[cpuvisor] Downloading features for dataset...') log.info('[cpuvisor] URL is: %s' % url) (target_path, target_fname) = os.path.split(fname) with utils.make_temp_directory() as temp_dir: tarball_file_ext = os.path.splitext(urlparse.urlparse(url).path)[1] tarball_fname = os.path.join(temp_dir, 'feats' + tarball_file_ext) utils.subproc_call_check(['wget -O %s %s' % (tarball_fname, url)], shell=True) with tarfile.open(tarball_fname) as tar: tar_ifos = {x.name: x for x in tar.getmembers()} if target_fname not in tar_ifos: raise RuntimeError('Precomputed feature tarball does not contain required file') else: tar.extractall(target_path, [tar_ifos[target_fname]]) return True
def index_limas(base_path, component_cfgs): components = component_cfgs['components'] links = component_cfgs['links'] data = component_cfgs['collection']['paths'] collection = component_cfgs['collection']['name'] conf_fn = os.path.join(components['limas'], 'conf', collection + '.py') os.environ['PATH'] = os.environ['PATH'] + ":" + os.path.join( components['limas'], 'bin') with utils.change_cwd(components['limas']): # index main video files cmd = ["scripts/shotdetection/index_videos.py", conf_fn, collection, os.path.join(data['private_data'], 'ffprobe')] utils.subproc_call_check(cmd) # index video-level metatada cmd = ["scripts/integration/index_meta.py", conf_fn, collection, os.path.join(data['private_data'], 'metadata')] utils.subproc_call_check(cmd) # index shot and keyframe data cmd = ["scripts/integration/index_shots_from_timings.py", conf_fn, collection, os.path.join(data['private_data'], 'shottimings')] utils.subproc_call_check(cmd) # index asr data cmd = ["scripts/integration/index_asr_from_timings.py", conf_fn, collection, os.path.join(data['private_data'], 'asr')] utils.subproc_call_check(cmd) # normalize data cmd = ["bin/limas", 'normalize', conf_fn ] utils.subproc_call_check(cmd) # index data to inverted files cmd = ["bin/limas", 'indexASR', conf_fn] utils.subproc_call_check(cmd) # create collection statistics cmd = ["bin/limas", 'es', conf_fn] utils.subproc_call_check(cmd)
def prepare_cpuvisor(base_path, component_cfgs): component_paths = component_cfgs['components'] links = component_cfgs['links'] collection = component_cfgs['collection'] index_dir = os.path.join(collection['paths']['index_data'], 'cpuvisor-srv') templates_dir = os.path.join('templates', 'cpuvisor-srv') path = component_paths['cpuvisor-srv'] cpuvisortls = utils.import_python_module_from_path( component_paths['cpuvisor-srv'], 'download_data') if not os.path.exists(os.path.join(component_paths['cpuvisor-srv'], 'bin')): raise RuntimeError( '[cpuvisor] Missing bin directory - compile before running link_components!' ) # prepare endpoints and paths models_path = os.path.join(component_paths['cpuvisor-srv'], 'model_data') negimgs_path = os.path.join(component_paths['cpuvisor-srv'], 'server_data', 'neg_images') negidx_path = os.path.join(component_paths['cpuvisor-srv'], 'negpaths.txt') negfeats_path = os.path.join(component_paths['cpuvisor-srv'], 'server_data', 'negfeats.binaryproto') server_endpoint = 'tcp://127.0.0.1:%d' % links['cpuvisor-srv'][ 'server_port'] notify_endpoint = 'tcp://127.0.0.1:%d' % links['cpuvisor-srv'][ 'notify_port'] image_cache_path = os.path.join(index_dir, 'cache', 'downloaded') rlist_cache_path = os.path.join(index_dir, 'cache', 'rlists') # prepare config log.info('[cpuvisor] Preparing config...') def prepare_config(): template_config = os.path.join(templates_dir, 'config.prototxt') output_config = os.path.join(component_paths['cpuvisor-srv'], 'config.%s.prototxt' % collection['name']) # if the config file already exists, read in fields added by index_data first restore_fields = False if os.path.exists(output_config): restore_fields = True def get_field(field_name): return cpuvisortls.get_config_field( component_paths['cpuvisor-srv'], field_name, output_config) dataset_im_paths = get_field('preproc_config.dataset_im_paths') dataset_im_base_path = get_field( 'preproc_config.dataset_im_base_path') dataset_feats_file = get_field('preproc_config.dataset_feats_file') # write the new config file replace_patterns = { '<MODELS_PATH>': models_path, '<NEG_IM_PATH>': negimgs_path, '<NEG_IM_INDEX>': negidx_path, '<NEG_FEATS_FILE>': negfeats_path, '<SERVER_ENDPOINT>': server_endpoint, '<NOTIFY_ENDPOINT>': notify_endpoint, '<IMAGE_CACHE_PATH>': image_cache_path, '<RLIST_CACHE_PATH>': rlist_cache_path } replace_patterns = list(replace_patterns.iteritems()) with open(template_config, 'r') as src_f: with open(output_config, 'w') as dst_f: utils.copy_replace(src_f, dst_f, replace_patterns) # now restore fields added by index_data if required if restore_fields: def set_field(field_name, field_value): cpuvisortls.set_config_field(component_paths['cpuvisor-srv'], field_name, field_value, output_config) set_field('preproc_config.dataset_im_paths', dataset_im_paths) set_field('preproc_config.dataset_im_base_path', dataset_im_base_path) set_field('preproc_config.dataset_feats_file', dataset_feats_file) prepare_config() # prepare start script log.info('[cpuvisor] Preparing start script...') def write_start_script(): outf = os.path.join(path, 'start.sh') utils.write_template(templates_dir, 'start.sh', outf, {'name': component_cfgs['collection']['name']}) os.chmod(outf, 0755) write_start_script() # download models log.info('[cpuvisor] Getting models...') cpuvisortls.download_models(models_path) # download features for negative images if os.path.exists(negfeats_path): log.info('[cpuvisor] Negative feature file exists') else: log.info( '[cpuvisor] Attempting to download features for negative images...' ) if not cpuvisortls.download_neg_feats(negfeats_path): # if no features could be downloaded, compute features using negative images instead log.info( '[cpuvisor] Could not download negative features - downloading negative training images instead...' ) if not utils.touch_dir(negimgs_path, 'negimgs'): cpuvisortls.download_neg_images(negimgs_path) log.info('[cpuvisor] Computing features for negative images...') with utils.change_cwd( os.path.join(component_paths['cpuvisor-srv'], 'bin')): utils.subproc_call_check([ './cpuvisor_preproc', '--config_path', '../config.%s.prototxt' % component_cfgs['collection']['name'], '--nodsetfeats' ])
def prepare_cpuvisor(base_path, component_cfgs): component_paths = component_cfgs['components'] links = component_cfgs['links'] collection = component_cfgs['collection'] index_dir = os.path.join(collection['paths']['index_data'], 'cpuvisor-srv') templates_dir = os.path.join('templates', 'cpuvisor-srv') path = component_paths['cpuvisor-srv'] cpuvisortls = utils.import_python_module_from_path(component_paths['cpuvisor-srv'], 'download_data') if not os.path.exists(os.path.join(component_paths['cpuvisor-srv'], 'bin')): raise RuntimeError('[cpuvisor] Missing bin directory - compile before running link_components!') # prepare endpoints and paths models_path = os.path.join(component_paths['cpuvisor-srv'], 'model_data') negimgs_path = os.path.join(component_paths['cpuvisor-srv'], 'server_data', 'neg_images') negidx_path = os.path.join(component_paths['cpuvisor-srv'], 'negpaths.txt') negfeats_path = os.path.join(component_paths['cpuvisor-srv'], 'server_data', 'negfeats.binaryproto') server_endpoint = 'tcp://127.0.0.1:%d' % links['cpuvisor-srv']['server_port'] notify_endpoint = 'tcp://127.0.0.1:%d' % links['cpuvisor-srv']['notify_port'] image_cache_path = os.path.join(index_dir, 'cache', 'downloaded') rlist_cache_path = os.path.join(index_dir, 'cache', 'rlists') # prepare config log.info('[cpuvisor] Preparing config...') def prepare_config(): template_config = os.path.join(templates_dir, 'config.prototxt') output_config = os.path.join(component_paths['cpuvisor-srv'], 'config.%s.prototxt' % collection['name']) # if the config file already exists, read in fields added by index_data first restore_fields = False if os.path.exists(output_config): restore_fields = True def get_field(field_name): return cpuvisortls.get_config_field(component_paths['cpuvisor-srv'], field_name, output_config) dataset_im_paths = get_field('preproc_config.dataset_im_paths') dataset_im_base_path = get_field('preproc_config.dataset_im_base_path') dataset_feats_file = get_field('preproc_config.dataset_feats_file') # write the new config file replace_patterns = { '<MODELS_PATH>': models_path, '<NEG_IM_PATH>': negimgs_path, '<NEG_IM_INDEX>': negidx_path, '<NEG_FEATS_FILE>': negfeats_path, '<SERVER_ENDPOINT>': server_endpoint, '<NOTIFY_ENDPOINT>': notify_endpoint, '<IMAGE_CACHE_PATH>': image_cache_path, '<RLIST_CACHE_PATH>': rlist_cache_path } replace_patterns = list(replace_patterns.iteritems()) with open(template_config, 'r') as src_f: with open(output_config, 'w') as dst_f: utils.copy_replace(src_f, dst_f, replace_patterns) # now restore fields added by index_data if required if restore_fields: def set_field(field_name, field_value): cpuvisortls.set_config_field(component_paths['cpuvisor-srv'], field_name, field_value, output_config) set_field('preproc_config.dataset_im_paths', dataset_im_paths) set_field('preproc_config.dataset_im_base_path', dataset_im_base_path) set_field('preproc_config.dataset_feats_file', dataset_feats_file) prepare_config() # prepare start script log.info('[cpuvisor] Preparing start script...') def write_start_script(): outf = os.path.join(path, 'start.sh') utils.write_template(templates_dir, 'start.sh', outf, {'name': component_cfgs['collection']['name']}) os.chmod(outf, 0755) write_start_script() # download models log.info('[cpuvisor] Getting models...') cpuvisortls.download_models(models_path) # download features for negative images if os.path.exists(negfeats_path): log.info('[cpuvisor] Negative feature file exists') else: log.info('[cpuvisor] Attempting to download features for negative images...') if not cpuvisortls.download_neg_feats(negfeats_path): # if no features could be downloaded, compute features using negative images instead log.info('[cpuvisor] Could not download negative features - downloading negative training images instead...') if not utils.touch_dir(negimgs_path, 'negimgs'): cpuvisortls.download_neg_images(negimgs_path) log.info('[cpuvisor] Computing features for negative images...') with utils.change_cwd(os.path.join(component_paths['cpuvisor-srv'], 'bin')): utils.subproc_call_check([ './cpuvisor_preproc', '--config_path', '../config.%s.prototxt' % component_cfgs['collection']['name'], '--nodsetfeats' ])