def get_performance(task,image_hash,image_fs,model_config,convolve_func): stats = ['test_accuracy','ap','auc','mean_ap','mean_auc','train_accuracy'] classifier_kwargs = task.get('classifier_kwargs',{}) split_results = [] splits = generate_splits(task,image_hash) filterbank = filter_generation.get_filterbank(model_config) for (ind,split) in enumerate(splits): print ('split', ind) train_data = split['train_data'] test_data = split['test_data'] train_filenames = [t['filename'] for t in train_data] test_filenames = [t['filename'] for t in test_data] assert set(train_filenames).intersection(test_filenames) == set([]) train_features = sp.row_stack([extract_features(im, image_fs, filterbank, model_config, convolve_func) for im in train_data]) test_features = sp.row_stack([extract_features(im, image_fs, filterbank, model_config, convolve_func) for im in test_data]) train_labels = split['train_labels'] test_labels = split['test_labels'] res = svm.classify(train_features,train_labels,test_features,test_labels,**classifier_kwargs) split_results.append(res) model_results = SON([]) for stat in stats: if stat in split_results[0] and split_results[0][stat] != None: model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean() return model_results, filterbank
def learn_filterbank(fhs,configs): fhs = [fh[0] for fh in fhs] config = configs[0][0].copy() num_slices = config['model']['num_slices'] filterbank = filter_generation.get_filterbank(config['model']) fh,fw,num_filters = filterbank.shape filter_kshape = (fh,fw) counts = np.zeros(num_filters) for fh in fhs: array = image2array(config['model'],fh)[:,:,0] slices = [get_random_slice(array,filter_kshape) for i in range(num_slices)] for s in slices: patch = array[s] distarray = [] for i in range(num_filters): d = dist(filterbank[:,:,i],patch) distarray.append(d) distarray = np.array(distarray) imax = distarray.argmax() counts[imax] += 1 lr = .0001 #lr = 1./counts[imax] filterbank[:,:,imax] = normalize(filterbank[:,:,imax]*(1 - lr) + patch*lr) return cPickle.dumps(filterbank)
def get_filterbank(config): result = filter_generation.get_filterbank(config['model']) if isinstance(result,dict): assert 'filterbank' in result result['__file__'] = cPickle.dumps(result.pop('filterbank')) return result else: return cPickle.dumps(result)
def generate_models(outfile,m_hash,config_gen): conn = pm.Connection(document_class = SON) db = conn[DB_NAME] m_coll = db['models.files'] m_fs = gridfs.GridFS(db,'models') remove_existing(m_coll,m_fs,m_hash) M = model_config_generator(config_gen) for (i,m) in enumerate(M): filterbank = filter_generation.get_filterbank(m['model']) filterbank_string = cPickle.dumps(filterbank) if (i/100)*100 == i: print(i,m) y = SON([('config',m)]) filename = get_filename(m) y['filename'] = filename y['__hash__'] = m_hash m_fs.put(filterbank_string,**y) createCertificateDict(outfile,{'model_hash':m_hash,'args':config_gen})