def generate_and_insert_single_image(x,im_hash): conn = pm.Connection(document_class = SON) db = conn[DB_NAME] im_coll = db['images.files'] im_fs = gridfs.GridFS(db,'images') image_string = rendering.render_image(x['image']) y = SON([('config',x)]) filename = get_filename(x) y['filename'] = filename y['__hash__'] = im_hash im_fs.put(image_string,**y)
def put_in_split(split,image_config_gen,m,task,ext_hash,split_id,split_fs): out_record = SON([('model',m['config']['model']), ('images',son_escape(image_config_gen['images'])), ('task',son_escape(task)), ('split_id',split_id), ]) filename = get_filename(out_record) out_record['filename'] = filename out_record['__hash__'] = ext_hash print('dump out ...') out_data = cPickle.dumps(SON([('split',split)])) split_fs.put(out_data,**out_record)
def extract_features_inner_core(image_certificate, model_certificate, feature_hash, image_hash, model_hash, convolve_func_name,device_id, im_query, m_query, im_skip, im_limit, m_skip, m_limit): if im_query is None: im_query = {} if m_query is None: m_query = {} im_query['__hash__'] = image_hash m_query['__hash__'] = model_hash conn = pm.Connection(document_class = SON) db = conn[DB_NAME] image_col = db['images.files'] model_col = db['models.files'] image_fs = gridfs.GridFS(db,'images') model_fs = gridfs.GridFS(db,'models') feature_fs = gridfs.GridFS(db,'features') if convolve_func_name == 'pyfft': context = v1_pyfft.setup_pyfft(device_id) context.push() convolve_func = functools.partial(v1f.v1like_filter_pyfft,device_id=device_id) else: convolve_func = v1f.v1like_filter_numpy L1 = get_most_recent_files(image_col,im_query,skip=im_skip,limit=im_limit) L2 = get_most_recent_files(model_col,m_query,skip=m_skip,limit=m_limit) for image_config in L1: for model_config in L2: features = compute_features(image_config['filename'], image_fs, model_config, model_fs,convolve_func) features_string = cPickle.dumps(features) y = SON([('config',SON([('model',model_config['config']['model']),('image',image_config['config']['image'])]))]) filename = get_filename(y['config']) y['filename'] = filename y['__hash__'] = feature_hash feature_fs.put(features_string,**y) if convolve_func_name == 'pyfft': context.pop()
def train_test_loop(outfile,extract_creates,task_config,feature_config_path,hash): feature_config = get_config(feature_config_path) base_query = SON([('__config_hash__',hash)]) image_params = SON([('image',feature_config['image'])]) models_params = feature_config['models'] ntrain = task_config['ntrain'] ntest = task_config['ntest'] ntrain_pos = task_config.get('ntrain_pos') N = task_config.get('N',10) query = task_config['query'] base_query.update(reach_in('config',task_config.get('universe',SON([])))) print('\n') print('BASE',base_query) print('\n') conn = pm.Connection(document_class=SON) db = conn['v1'] fs = gridfs.GridFS(db, collection = 'model_performance') cquery = reach_in('config',query) for m in models_params: base_query_copy = base_query.copy() base_query_copy.update(reach_in('config.model',m)) splitdata, results = train_test(cquery,'v1','features',ntrain,ntest,ntrain_pos=ntrain_pos,N=N,universe=base_query_copy) splitpickle = cPickle.dumps(splitdata) data = SON([('feature_config_path',feature_config_path), ('model',m), ('task',son_escape(task_config)), ('image__aggregate__',son_escape(feature_config['image']))]) filename = get_filename(data) data.update(results) data['filename'] = filename fs.put(splitpickle,**data) createCertificateDict(outfile,{'task_config':task_config,'feature_config':feature_config,'feature_config_path':feature_config_path})
def put_in_split_result(res,image_config_gen,m,task,ext_hash,split_id,splitres_fs): out_record = SON([('model',m['config']['model']), ('images',son_escape(image_config_gen['images'])), ('task',son_escape(task)), ('split_id',split_id), ]) split_result = SON([]) for stat in STATS: if stat in res and res[stat] != None: split_result[stat] = res[stat] filename = get_filename(out_record) out_record['filename'] = filename out_record['__hash__'] = ext_hash out_record.update(split_result) print('dump out split result...') out_data = cPickle.dumps(SON([('split_result',res)])) splitres_fs.put(out_data,**out_record)
def generate_images(outfile,im_hash,config_gen): conn = pm.Connection(document_class = SON) db = conn[DB_NAME] im_coll = db['images.files'] im_fs = gridfs.GridFS(db,'images') remove_existing(im_coll,im_fs,im_hash) X = rendering.config_gen(config_gen) for (i,x) in enumerate(X): if (i/100)*100 == i: print(i,x) image_string = rendering.render_image(x['image']) y = SON([('config',x)]) filename = get_filename(x) y['filename'] = filename y['__hash__'] = im_hash im_fs.put(image_string,**y) createCertificateDict(outfile,{'image_hash':im_hash,'args':config_gen})
def generate_models(outfile,m_hash,config_gen): conn = pm.Connection(document_class = SON) db = conn[DB_NAME] m_coll = db['models.files'] m_fs = gridfs.GridFS(db,'models') remove_existing(m_coll,m_fs,m_hash) M = model_config_generator(config_gen) for (i,m) in enumerate(M): filterbank = filter_generation.get_filterbank(m['model']) filterbank_string = cPickle.dumps(filterbank) if (i/100)*100 == i: print(i,m) y = SON([('config',m)]) filename = get_filename(m) y['filename'] = filename y['__hash__'] = m_hash m_fs.put(filterbank_string,**y) createCertificateDict(outfile,{'model_hash':m_hash,'args':config_gen})
def greedy_optimization(outfile,task,image_certificate_file,initial_model,convolve_func,rep_limit, modifier_args,modifier): conn = pm.Connection(document_class=bson.SON) db = conn['v1'] opt_fs = gridfs.GridFS(db,'optimized_performance') image_coll = db['raw_images.files'] image_fs = gridfs.GridFS(db,'raw_images') image_certdict = cPickle.load(open(image_certificate_file)) print('using image certificate', image_certificate_file) image_hash = image_certdict['run_hash'] image_args = image_certdict['out_args'] if convolve_func == v1f.v1like_filter_pyfft: v1_pyfft.setup_pyfft() filterbanks = [] perfs = [] model_configs = [] center_config = initial_model i = 0 improving = True while ((i < rep_limit) or rep_limit is None): i += 1 print('Round', i) next_configs = [m for m in get_consistent_deltas(center_config,modifier) if m not in model_configs] if next_configs: next_results = [get_performance(task,image_hash,image_fs,m,convolve_func) for m in next_configs] next_perfs = [x[0] for x in next_results] next_filterbanks = [x[1] for x in next_results] next_perf_ac_max = np.array([x['test_accuracy'] for x in next_perfs]).max() perf_ac_max = max([x['test_accuracy'] for x in perfs]) if perfs else 0 if next_perf_ac_max > perf_ac_max: next_perf_ac_argmax = np.array([x['test_accuracy'] for x in next_perfs]).argmax() center_config = next_configs[next_perf_ac_argmax] print('\n\n') print('new best performance is', next_perf_ac_max, 'from model', center_config) print('\n\n') perfs.extend(next_perfs) model_configs.extend(next_configs) filterbanks.extend(next_filterbanks) else: print('Breaking because no further optimization could be done. Best existing performance was', perf_ac_max, 'while best next performance was', next_perf_ac_max) break else: print('Breaking because no next configs') break perfargmax = np.array([p['test_accuracy'] for p in perfs]).argmax() best_model = model_configs[perfargmax] best_performance = perfs[perfargmax] out_record = SON([('initial_model',initial_model), ('task',son_escape(task)), ('images',son_escape(image_args)), ('images_hash',image_hash), ('modifier_args',son_escape(modifier_args)), ('modifier',modifier.__class__.__module__ + '.' + modifier.__class__.__name__) ]) filename = get_filename(out_record) out_record['filename'] = filename out_record.update(SON([('performances',perfs)])) out_record.update(SON([('best_model',best_model)])) out_record.update(SON([('best_performance',best_performance)])) out_record.update(SON([('num_steps',len(model_configs))])) out_record.update(SON([('models',model_configs)])) outdata = cPickle.dumps(filterbanks) opt_fs.put(outdata,**out_record) if convolve_func == v1f.v1like_filter_pyfft: v1_pyfft.cleanup_pyfft() createCertificateDict(outfile,{'image_file':image_certificate_file})
def evaluate(outfile,feature_certificate,cpath,task,ext_hash): conn = pm.Connection(document_class=bson.SON) db = conn[DB_NAME] perf_fs = gridfs.GridFS(db,'performance') perf_coll = db['performance.files'] remove_existing(perf_coll,perf_fs,ext_hash) feature_certdict = cPickle.load(open(feature_certificate)) feature_hash = feature_certdict['feature_hash'] image_hash = feature_certdict['image_hash'] model_hash = feature_certdict['model_hash'] image_config_gen = feature_certdict['args']['images'] model_col = db['models.files'] feature_fs = gridfs.GridFS(db,'features') feature_col = db['features.files'] stats = ['test_accuracy','ap','auc','mean_ap','mean_auc','train_accuracy'] if isinstance(task,list): task_list = task else: task_list = [task] model_configs = get_most_recent_files(model_col,{'__hash__':model_hash}) for m in model_configs: print('Evaluating model',m) for task in task_list: task['universe'] = task.get('universe',SON([])) task['universe']['model'] = m['config']['model'] print('task', task) classifier_kwargs = task.get('classifier_kwargs',{}) split_results = [] splits = generate_splits(task,feature_hash,'features') for (ind,split) in enumerate(splits): print ('split', ind) train_data = split['train_data'] test_data = split['test_data'] train_filenames = [t['filename'] for t in train_data] test_filenames = [t['filename'] for t in test_data] assert set(train_filenames).intersection(test_filenames) == set([]) print('train feature extraction ...') train_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in train_data]) print('test feature extraction ...') test_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in test_data]) train_labels = split['train_labels'] test_labels = split['test_labels'] print('classifier ...') res = svm.classify(train_features,train_labels,test_features,test_labels,classifier_kwargs) print('Split test accuracy', res['test_accuracy']) split_results.append(res) model_results = SON([]) for stat in STATS: if stat in split_results[0] and split_results[0][stat] != None: model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean() out_record = SON([('model',m['config']['model']), ('model_hash',model_hash), ('model_filename',m['filename']), ('images',son_escape(image_config_gen)), ('image_hash',image_hash), ('task',son_escape(task)), ]) filename = get_filename(out_record) out_record['filename'] = filename out_record['config_path'] = cpath out_record['__hash__'] = ext_hash out_record.update(model_results) print('dump out ...') out_data = cPickle.dumps(SON([('split_results',split_results),('splits',splits)])) perf_fs.put(out_data,**out_record) createCertificateDict(outfile,{'feature_file':feature_certificate})