def generate_splits(task_config,hash,colname): base_query = SON([('__hash__',hash)]) ntrain = task_config['ntrain'] ntest = task_config['ntest'] ntrain_pos = task_config.get('ntrain_pos') ntest_pos = task_config.get('ntest_pos') N = task_config.get('N',10) query = task_config['query'] base_query.update(reach_in('config',task_config.get('universe',SON([])))) cquery = reach_in('config',query) print('q',cquery) print('u',base_query) return traintest.generate_split2(DB_NAME,colname,cquery,N,ntrain,ntest,ntrain_pos=ntrain_pos,ntest_pos = ntest_pos,universe=base_query,use_negate = True)
def train_test_loop(outfile,extract_creates,task_config,feature_config_path,hash): feature_config = get_config(feature_config_path) base_query = SON([('__config_hash__',hash)]) image_params = SON([('image',feature_config['image'])]) models_params = feature_config['models'] ntrain = task_config['ntrain'] ntest = task_config['ntest'] ntrain_pos = task_config.get('ntrain_pos') N = task_config.get('N',10) query = task_config['query'] base_query.update(reach_in('config',task_config.get('universe',SON([])))) print('\n') print('BASE',base_query) print('\n') conn = pm.Connection(document_class=SON) db = conn['v1'] fs = gridfs.GridFS(db, collection = 'model_performance') cquery = reach_in('config',query) for m in models_params: base_query_copy = base_query.copy() base_query_copy.update(reach_in('config.model',m)) splitdata, results = train_test(cquery,'v1','features',ntrain,ntest,ntrain_pos=ntrain_pos,N=N,universe=base_query_copy) splitpickle = cPickle.dumps(splitdata) data = SON([('feature_config_path',feature_config_path), ('model',m), ('task',son_escape(task_config)), ('image__aggregate__',son_escape(feature_config['image']))]) filename = get_filename(data) data.update(results) data['filename'] = filename fs.put(splitpickle,**data) createCertificateDict(outfile,{'task_config':task_config,'feature_config':feature_config,'feature_config_path':feature_config_path})