def pull_gridded_gabors_sq_vs_rect_onefilter_coarser(depends_on = '../config/config_greedy_optimization_onegabor_filterbank_sq_vs_rect_coarser.py'): """ Greedy search for better single-gabor sq vs rectangle, with coarser step in search RESULT: it's all bad """ D = v1_greedy_optimization_protocol(depends_on) actualize(D)
def extract_and_evaluate_protocol(evaluate_config_path,model_config_path,image_config_path,convolve_func_name='numpy', write=False,parallel=False): model_config_gen = get_config(model_config_path) model_hash = get_config_string(model_config_gen['models']) model_certificate = '../.model_certificates/' + model_hash image_config_gen = get_config(image_config_path) image_hash = get_config_string(image_config_gen['images']) image_certificate = '../.image_certificates/' + image_hash evaluate_config = get_config(evaluate_config_path) task_config = evaluate_config.pop('train_test') D = [] DH = {} for task in task_config: overall_config_gen = SON([('models',model_config_gen),('images',image_config_gen),('task',task)]) ext_hash = get_config_string(overall_config_gen) if not parallel: performance_certificate = '../.performance_certificates/' + ext_hash op = ('evaluation_' + ext_hash,extract_and_evaluate,(performance_certificate,image_certificate,model_certificate,evaluate_config_path,convolve_func_name,task,ext_hash)) D.append(op) DH[ext_hash] = [op] else: performance_certificate = '../.performance_certificates/' + ext_hash batch_certificate = '../.batch_certificates/' + ext_hash op = ('evaluation' + ext_hash,extract_and_evaluate_parallel,(batch_certificate,image_certificate,model_certificate,evaluate_config_path,convolve_func_name,task,ext_hash)) D.append(op) DH[ext_hash] = [op] if write: actualize(D) return DH
def v1_greedy_optimization_protocol(config_path,use_cpu = False,write=False): D = DBAdd(image_initialize,args = (config_path,)) oplist = do_initialization(image_initialize,args = (config_path,)) image_certificate = oplist[0]['outcertpaths'][0] if use_cpu or not GPU_SUPPORT: convolve_func = v1f.v1like_filter_numpy else: convolve_func = v1f.v1like_filter_pyfft config = get_config(config_path) task = config['evaluation_task'] initial_model = config['model'] modifier_args = config['modifier_args'] modifier_class = config.get('modifier') rep_limit = config.get('rep_limit') if modifier_class is None: modifier = config_modifiers.BaseModifier(modifier_args) else: modifier = modifier_class(modifier_args) newhash = get_config_string(config) outfile = '../.optimization_certificates/' + newhash op = ('optimization_' + newhash,greedy_optimization,(outfile,task,image_certificate,initial_model,convolve_func,rep_limit,modifier_args,modifier)) D.append(op) if write: actualize(D) return D
def evaluate_protocol(evaluate_config_path,model_config_path,image_config_path,write=False): model_config_gen = get_config(model_config_path) image_config_gen = get_config(image_config_path) overall_config_gen = SON([('models',model_config_gen['models']), ('images',image_config_gen['images'])]) feature_hash = get_config_string(overall_config_gen) feature_certificate = '../.feature_certificates/' + feature_hash evaluate_config = get_config(evaluate_config_path) task_config = evaluate_config.pop('train_test') D = [] ext_hashes = [] for task in task_config: overall_config_gen = SON([('models',model_config_gen), ('image',image_config_gen),('task',task)]) ext_hash = get_config_string(overall_config_gen) outfile = '../.performance_certificates/' + ext_hash op = ('svm_evaluation_' + ext_hash,evaluate,(outfile,feature_certificate, evaluate_config_path,task,ext_hash)) D.append(op) ext_hashes.append(ext_hash) if write: actualize(D) return D,ext_hashes
def optimize_gridded_gabors_sq_vs_rect_twofilter(depends_on = '../config/config_greedy_optimization_twogabor_filterbank_sq_vs_rect.py'): """ Greedy search for best two-orthogonal-gabor filterbank sq vs rectangle RESULT: you get to v. high performance quickly (and this could really benefit from better search procedure to optimize further) """ D = v1_greedy_optimization_protocol(depends_on) actualize(D)
def OGRInspectorInstantiator(depends_on=root, creates=protocol_root + "OGRInspectors.py"): L = [l for l in listdir(depends_on) if IsDir(depends_on + l)] outdir = "../Data/ShapeFileOGRInspections/" D = [("initialize", MakeDir, (outdir,))] D += [("Inspect_" + l, ogrinspectdir, (depends_on + l, l, outdir + l + ".py")) for l in L] actualize(creates, D)
def combine_components(depends_on=SOURCE_COMPONENTS_DIR): L = [ (x, os.path.relpath(x, depends_on).replace(os.sep, "__").split(".")[0]) for x in RecursiveFileList(depends_on) if x.endswith(".pickle") ] D = [("component_" + name, add_component, (file, os.path.join(CERTIFICATE_DIR, name))) for (file, name) in L] actualize(D)
def model_protocol(config_path,write = False,parallel=False): config = get_config(config_path) model_hash = get_config_string(config['models']) model_certificate = '../.model_certificates/' + model_hash D = [('generate_models',generate_models,(model_certificate,model_hash,config))] if write: actualize(D) return D,model_hash
def extract_and_evaluate(evaluate_config_path, model_config_path, image_config_path, convolve_func_name='numpy'): DH = protocols.extract_and_evaluate_protocol(evaluate_config_path,model_config_path, image_config_path, convolve_func_name=convolve_func_name, write=False) for (h,ops) in DH.items(): actualize(ops,outfiledir=get_code_dir(h)) return DH.keys()
def extract_features(image_config_path, model_config_path, convolve_func_name = 'numpy', parallel=False, batch_size=1000): D,hash = protocols.extract_features_protocol(image_config_path, model_config_path, convolve_func_name = convolve_func_name, write = False, parallel=parallel, batch_size=batch_size) actualize(D,outfiledir=get_code_dir(hash)) return hash
def image_protocol(config_path,write = False,parallel=False): config = get_config(config_path) image_hash = image_protocol_hash(config_path) image_certificate = '../.image_certificates/' + image_hash if not parallel: D = [('generate_images',generate_images,(image_certificate,image_hash,config))] else: D = [('generate_images',generate_images_parallel,(image_certificate,image_hash,config))] if write: actualize(D) return D,image_hash
def v1s_protocol(param_fname, img_path, results_dir,write=False): param_path = os.path.abspath(param_fname) v1s_params = {} execfile(param_path, {}, v1s_params) ntrials = v1s_params['protocol']['ntrials'] D = [('initialize',MakeDir,(results_dir,))] D += [('run_' + str(i),v1s.run_one_trial,(param_fname, img_path, os.path.join(results_dir,'results_' + str(i) + '.pickle'))) for i in range(ntrials)] if write: actualize(D) return D
def v1s_seq_protocol(param_fname, img_path, results_dir, printout_file, scores_file, write=False): param_path = os.path.abspath(param_fname) v1s_params = {} execfile(param_path, {}, v1s_params) ntrials = v1s_params['protocol']['ntrials'] D = [('initialize',MakeDir,(results_dir,))] for i in range(ntrials): D += v1s.trial_protocol(param_fname, img_path, results_dir, prefix = 'trial_' + str(i), make_container = False) D += [('aggregate_results', v1s.aggregate_results, (results_dir, printout_file, scores_file))] if write: actualize(D) return D
def extract_features_protocol(image_config_path, model_config_path, convolve_func_name = 'numpy', write = False, parallel=False, batch_size=1000): model_config_gen = get_config(model_config_path) model_hash = get_config_string(model_config_gen['models']) model_certificate = '../.model_certificates/' + model_hash image_config_gen = get_config(image_config_path) image_hash = get_config_string(image_config_gen['images']) image_certificate = '../.image_certificates/' + image_hash overall_config_gen = SON([('models',model_config_gen['models']),('images',image_config_gen['images'])]) feature_hash = get_config_string(overall_config_gen) feature_certificate = '../.feature_certificates/' + feature_hash if not parallel: D = [('extract_features', extract_features,(feature_certificate, image_certificate, model_certificate, overall_config_gen, feature_hash, convolve_func_name))] else: D = [('extract_features', extract_features_parallel,(feature_certificate, image_certificate, model_certificate, overall_config_gen, feature_hash, convolve_func_name, batch_size))] if write: actualize(D) return D, feature_hash
def v1s_simple(depends_on = '../v1/params_simple.py'): D = v1s_protocol(depends_on,'../data/101_ObjectCategories',os.path.join(COMP_ROOT,'simple/')) actualize(D)
def evaluate(evaluate_config_path,model_config_path,image_config_path): D,hashes = protocols.evaluate_protocol(evaluate_config_path,model_config_path,image_config_path,write=False) for (d,h) in zip(D,hashes): actualize([d],outfiledir=get_code_dir(h)) return hashes
def pixel_rot_no_preproc2_evaluation(depends_on = ('../config/config_pixel_rot_no_preproc2.py','../config/config_pixel_rot_no_preproc_evaluation.py')): D = v1_evaluation_protocol(depends_on[1],depends_on[0]) actualize(D)
def pixel_rot_scale_evaluation(depends_on = ('../config/config_pixel_rot_scale.py','../config/config_pixel_rot_scale_evaluation.py')): D = v1_evaluation_protocol(depends_on[1],depends_on[0]) actualize(D)
def test_learn_protocol(config_path): D = DBAdd(test_learning_initialize,args = (config_path,)) actualize(D)
def cairofilters_sq_vs_rect_norm_trans_evaluation(depends_on = ('../config/config_cairofilters_sq_vs_rect_norm.py','../config/config_cairofilters_sq_vs_rect_norm_trans_evaluation.py')): D = v1_evaluation_protocol(depends_on[1],depends_on[0]) actualize(D)
def models(config_path,parallel=False): D,hash = protocols.model_protocol(config_path,write = False,parallel=parallel) actualize(D,outfiledir=get_code_dir(hash)) return hash
def hsuck(seed,datadir,L,suffix='',write=True,ipath=None,getfunc0=None): if is_string_like(seed): seed = [('URL',seed)] if getfunc0 is None: getfunc0 = modwget if suffix and not suffix.endswith('_'): suffix = suffix + '_' if not datadir.endswith('/'): datadir += '/' D = [(suffix + 'initialize',hstart,(seed,datadir,getfunc0))] for (i,l) in enumerate(L[:-1]): round = i+1 oldmanifestpath = datadir + 'Manifest_' + str(round-1) + '.tsv' newmanifestpath = datadir + 'Manifest_' + str(round) + '.tsv' oldtotallinkpath = datadir + 'TotalLinks_' + str(round-1) + '.tsv' newtotallinkpath = datadir + 'TotalLinks_' + str(round) + '.tsv' olddownloaddir = datadir + 'Downloads_' + str(round-1) + '/' newdownloaddir = datadir + 'Downloads_' + str(round) + '/' Suffix = suffix + 'Round' + str(round) + '_' if hasattr(l,'__call__'): Parser = l Getter = modwget splitfunc = None prefixlist = None else: assert isinstance(l,dict) and 'Parser' in l.keys() Parser = l['Parser'] if 'Splitter' in l.keys(): (splitfunc, prefixlist) = l['Splitter'] else: (splitfunc, prefixlist) = (None, None) if 'Getter' in l.keys(): Getter = l['Getter'] else: Getter = modwget D += [(Suffix + 'parse',applyparser,(oldmanifestpath,oldtotallinkpath,olddownloaddir,newmanifestpath,newtotallinkpath,Parser,splitfunc,prefixlist,round))] if (splitfunc != None) and (prefixlist != None): assert all(['/' not in p for p in prefixlist]) splitdir = datadir + 'SplitManifest_' + str(round) + '/' D += [(Suffix + 'splitmanifest',applysplitter,(newmanifestpath,splitdir))] D += [(Suffix + 'initializedownloads',MakeDir,(newdownloaddir,))] D += [(Suffix + 'download_' + pathprocessor([p]).replace('!','_').replace('-','_'),applygetter,(splitdir + 'Manifest_' + pathprocessor([p]) + '.tsv',newdownloaddir + pathprocessor([p]) + '/',Getter)) for p in prefixlist] else: D += [(Suffix + 'download',applygetter,(newmanifestpath,newdownloaddir,Getter))] if L[-1]: oldmanifestpath = datadir + 'Manifest_' + str(round) + '.tsv' newmanifestpath = datadir + 'Catalog.tsv' oldtotallinkpath = datadir + 'TotalLinks_' + str(round) + '.tsv' olddownloaddir = datadir + 'Downloads_' + str(round) + '/' Suffix = suffix + 'Final_' assert hasattr(L[-1],'__call__') Parser = L[-1] D += [(Suffix + 'parse',applyparser,(oldmanifestpath,oldtotallinkpath,olddownloaddir,newmanifestpath,None,Parser,None,None,'final'))] if write: assert ipath, 'ipath must be specified' actualize(ipath,D) return D
def v1s_simple_seq(depends_on = '../v1/params_simple.py'): D = v1s_seq_protocol(depends_on,'../data/101_ObjectCategories',os.path.join(COMP_ROOT,'simple/'),os.path.join(COMP_ROOT,'simple_results.txt'),os.path.join(COMP_ROOT,'simple_scores.pickle')) actualize(D)
def backendProtocol(parserObj, certdir = None, createCertDir = False, downloadPath = None, createPath = None, indexPath = None, write = True, uptostep=None): parserObj.verify() collectionName = parserObj.collectionName parser = parserObj.parser downloader = parserObj.downloader downloadProtocol = parserObj.downloadProtocol downloadArgs = parserObj.downloadArgs downloadKwargs = parserObj.downloadKwargs parserArgs = parserObj.parserArgs parserKwargs = parserObj.parserKwargs trigger = parserObj.trigger ID = parserObj.ID incremental = parserObj.incremental slicesCorrespondToIndexes = parserObj.slicesCorrespondToIndexes if ID == None: ID = collectionName if ID and not ID.endswith('_'): ID += '_' outdir = CERT_PROTOCOL_ROOT + collectionName + '/' if not PathExists(outdir): MakeDir(outdir) StepList = [] if certdir == None and any([x == None for x in [downloadPath,createPath,indexPath]]): certdir = CERT_ROOT if certdir: if createCertDir: StepList += [(ID + 'initialize',MakeDir,(certdir,))] if downloadPath == None: downloadPath = certdir + ID + 'downloadCertificate.txt' if createPath == None: createPath = certdir + ID + 'createCertificate.txt' if indexPath == None: indexPath = certdir + ID + 'indexCertificates/' if downloader: if isinstance(downloader,list): if downloadArgs == None: downloadArgs = [()]*len(downloader) if downloadKwargs == None: downloadKwargs = [{}]*len(downloader) downloadStepsGen = lambda DIR,T : [(ID + 'download_' + n + ('_' + T if T else ''),d,[(DIR ,) + a,b]) for ((d,n),a,b) in zip(downloader,downloadArgs,downloadKwargs)] else: assert hasattr(downloader,'__call__') if downloadArgs == None: downloadArgs = () if downloadKwargs == None: downloadKwargs = {} downloadStepsGen = lambda DIR,T : [(ID + 'download' + ('_' + T if T else '') ,downloader,[(DIR,) + downloadArgs,downloadKwargs])] elif downloadProtocol: if downloadArgs == None: downloadArgs = () if downloadKwargs == None: downloadKwargs = {} downloadStepsGen = lambda DIR,T : downloadProtocol(DIR,T,*downloadArgs,**downloadKwargs) download_root = DOWNLOAD_ROOT + collectionName + '/' T = trigger() if trigger else '' if incremental: if PathExists(outdir + 'manifest.tsv'): M = tb.tabarray(SVfile = outdir + 'manifest.tsv') newinc = max(M['increment']) + 1 if T: if T in M['trigger']: print 'It appears the increment, ' + T + ', has already run, not adding.' else: print 'Adding new increment', T M = M.addrecords((newinc,T)) else: M = tb.tabarray(records = [(0,T)],names = ['increment','trigger'],formats='int,str') else: M = tb.tabarray(records = [(-1,T)],names = ['increment','trigger'],formats='int,str') M.saveSV(outdir + 'manifest.tsv') StepList += ListUnion([downloadStepsGen(increment_format(download_root, m['increment']),m['trigger']) for m in M]) StepList += [(ID + 'download_check',download_check,(download_root,incremental,downloadPath))] StepList += [(ID + 'updateCollection',updateCollection,[(download_root,collectionName,parser,downloadPath,createPath),{'parserArgs':parserArgs,'parserKwargs':parserKwargs,'incremental':incremental}]), (ID + 'updateCollectionIndex',indexing.updateCollectionIndex,(collectionName,createPath,indexPath),{'slicesCorrespondToIndexes':slicesCorrespondToIndexes})] if uptostep: for (i,d) in enumerate(StepList): if d[0] == ID + uptostep: StepList = StepList[:i+1] break if write: outfile = outdir + 'steps.py' actualize(StepList,outfilename=outfile)
def v1_feature_extraction_protocol(config_path,use_cpu = False,write=False): D = DBAdd(v1_initialize,args = (config_path,use_cpu)) if write: actualize(D) return D
def v1like_a_feature_instantiator(): D = feature_extraction_protocol('images','../v1/config/v1like_a.py') actualize(D)
def MakeBLS_Resource(): L = [{'Parser':BLS_mainparse1,'Getter':WgetMultiple},None] D = htools.hsuck('http://www.bls.gov/data/', os.path.join(DATA_ROOT,'BLS_Hierarchy') + '/', L, write=False) actualize(D)
def images(config_path,parallel=False,reads=None): D,hash = protocols.image_protocol(config_path,write = False,parallel=parallel,reads=reads) actualize(D,outfiledir=get_code_dir(hash)) return hash