Beispiel #1
0
def generate_and_insert_single_image(x,im_hash):

    conn = pm.Connection(document_class = SON)
    db = conn[DB_NAME]
    im_coll = db['images.files']
    im_fs = gridfs.GridFS(db,'images')
    
    image_string = rendering.render_image(x['image']) 
    y = SON([('config',x)])
    filename = get_filename(x)
    y['filename'] = filename
    y['__hash__'] = im_hash
    im_fs.put(image_string,**y)
Beispiel #2
0
def put_in_split(split,image_config_gen,m,task,ext_hash,split_id,split_fs):
    out_record = SON([('model',m['config']['model']),
                      ('images',son_escape(image_config_gen['images'])),
                      ('task',son_escape(task)),
                      ('split_id',split_id),
                 ])   

    filename = get_filename(out_record)
    out_record['filename'] = filename
    out_record['__hash__'] = ext_hash
    print('dump out ...')
    out_data = cPickle.dumps(SON([('split',split)]))
    
    split_fs.put(out_data,**out_record)
Beispiel #3
0
def extract_features_inner_core(image_certificate, model_certificate, feature_hash, image_hash,
     model_hash, convolve_func_name,device_id, im_query, m_query, im_skip,
     im_limit, m_skip, m_limit):

    if im_query is None:
        im_query = {}
    if m_query is None:
        m_query = {}
        
    im_query['__hash__'] = image_hash
    m_query['__hash__'] = model_hash

    conn = pm.Connection(document_class = SON)
    db = conn[DB_NAME]
    
    image_col = db['images.files'] 
    model_col = db['models.files'] 
    
    image_fs = gridfs.GridFS(db,'images')
    model_fs = gridfs.GridFS(db,'models')
    
    feature_fs = gridfs.GridFS(db,'features')
                       
    if convolve_func_name == 'pyfft':
        context = v1_pyfft.setup_pyfft(device_id)
        context.push()
        convolve_func = functools.partial(v1f.v1like_filter_pyfft,device_id=device_id)
    else:
        convolve_func = v1f.v1like_filter_numpy

    L1 = get_most_recent_files(image_col,im_query,skip=im_skip,limit=im_limit)
    L2 = get_most_recent_files(model_col,m_query,skip=m_skip,limit=m_limit)
        
    for image_config in L1:
        for model_config in L2: 
            features = compute_features(image_config['filename'], image_fs, model_config, model_fs,convolve_func)
            features_string = cPickle.dumps(features)
            y = SON([('config',SON([('model',model_config['config']['model']),('image',image_config['config']['image'])]))])
            filename = get_filename(y['config'])
            y['filename'] = filename
            y['__hash__'] = feature_hash
            feature_fs.put(features_string,**y)
            
            
    if convolve_func_name == 'pyfft':
        context.pop()
Beispiel #4
0
def train_test_loop(outfile,extract_creates,task_config,feature_config_path,hash):

    feature_config = get_config(feature_config_path)
        
    base_query = SON([('__config_hash__',hash)])
    
    image_params = SON([('image',feature_config['image'])])
    models_params = feature_config['models']

    ntrain = task_config['ntrain']
    ntest = task_config['ntest']
    ntrain_pos = task_config.get('ntrain_pos')
    N = task_config.get('N',10)
    query = task_config['query']  
    base_query.update(reach_in('config',task_config.get('universe',SON([]))))
 
    print('\n')
    print('BASE',base_query)
    print('\n')
    
    conn = pm.Connection(document_class=SON)
    db = conn['v1']
    fs = gridfs.GridFS(db, collection = 'model_performance')
    
    cquery = reach_in('config',query)
    for m in models_params:
        base_query_copy = base_query.copy()
        base_query_copy.update(reach_in('config.model',m))
        splitdata, results = train_test(cquery,'v1','features',ntrain,ntest,ntrain_pos=ntrain_pos,N=N,universe=base_query_copy)
        
        splitpickle = cPickle.dumps(splitdata)
        
        data = SON([('feature_config_path',feature_config_path),
                    ('model',m),
                    ('task',son_escape(task_config)),
                    ('image__aggregate__',son_escape(feature_config['image']))])
        filename = get_filename(data)
        data.update(results)
        data['filename'] = filename
        

        fs.put(splitpickle,**data)
        
    createCertificateDict(outfile,{'task_config':task_config,'feature_config':feature_config,'feature_config_path':feature_config_path})    
Beispiel #5
0
def put_in_split_result(res,image_config_gen,m,task,ext_hash,split_id,splitres_fs):
    out_record = SON([('model',m['config']['model']),
                      ('images',son_escape(image_config_gen['images'])),
                      ('task',son_escape(task)),
                      ('split_id',split_id),
                 ])   
                 
    split_result = SON([])
    for stat in STATS:
        if stat in res and res[stat] != None:
            split_result[stat] = res[stat] 

    filename = get_filename(out_record)
    out_record['filename'] = filename
    out_record['__hash__'] = ext_hash
    out_record.update(split_result)
    print('dump out split result...')
    out_data = cPickle.dumps(SON([('split_result',res)]))
    splitres_fs.put(out_data,**out_record)          
Beispiel #6
0
def generate_images(outfile,im_hash,config_gen):

    conn = pm.Connection(document_class = SON)
    db = conn[DB_NAME]
    im_coll = db['images.files']
    im_fs = gridfs.GridFS(db,'images')
    
    remove_existing(im_coll,im_fs,im_hash)
    
    X = rendering.config_gen(config_gen)
    
    for (i,x) in enumerate(X):
        if (i/100)*100 == i:
            print(i,x)       
        image_string = rendering.render_image(x['image']) 
        y = SON([('config',x)])
        filename = get_filename(x)
        y['filename'] = filename
        y['__hash__'] = im_hash
        im_fs.put(image_string,**y)
        
    createCertificateDict(outfile,{'image_hash':im_hash,'args':config_gen})
Beispiel #7
0
def generate_models(outfile,m_hash,config_gen):

    conn = pm.Connection(document_class = SON)
    db = conn[DB_NAME]
    m_coll = db['models.files']
    m_fs = gridfs.GridFS(db,'models')
    
    remove_existing(m_coll,m_fs,m_hash)
    
    M = model_config_generator(config_gen)       
    
    for (i,m) in enumerate(M):
        filterbank = filter_generation.get_filterbank(m['model']) 
        filterbank_string = cPickle.dumps(filterbank)
        if (i/100)*100 == i:
            print(i,m) 
        
        y = SON([('config',m)])
        filename = get_filename(m)
        y['filename'] = filename
        y['__hash__'] = m_hash
        m_fs.put(filterbank_string,**y)
        
    createCertificateDict(outfile,{'model_hash':m_hash,'args':config_gen})
Beispiel #8
0
def greedy_optimization(outfile,task,image_certificate_file,initial_model,convolve_func,rep_limit, modifier_args,modifier):

    conn = pm.Connection(document_class=bson.SON)
    db = conn['v1']
    
    opt_fs = gridfs.GridFS(db,'optimized_performance')
    
    image_coll = db['raw_images.files']
    image_fs = gridfs.GridFS(db,'raw_images')
    
    image_certdict = cPickle.load(open(image_certificate_file))
    print('using image certificate', image_certificate_file)
    
    image_hash = image_certdict['run_hash']
    image_args = image_certdict['out_args']

    if convolve_func == v1f.v1like_filter_pyfft:
        v1_pyfft.setup_pyfft()
    
  
    filterbanks = []
    perfs = []
    model_configs = []
    center_config = initial_model
    
    i = 0
    improving = True
    
    
    while ((i < rep_limit) or rep_limit is None):
        i += 1
        print('Round', i)
        next_configs = [m for m in get_consistent_deltas(center_config,modifier) if m not in model_configs]

        if next_configs:
            next_results = [get_performance(task,image_hash,image_fs,m,convolve_func) for m in next_configs]
            next_perfs = [x[0] for x in next_results]
            next_filterbanks = [x[1] for x in next_results]
            next_perf_ac_max = np.array([x['test_accuracy'] for x in next_perfs]).max()
            perf_ac_max = max([x['test_accuracy'] for x in perfs]) if perfs else 0
            if next_perf_ac_max > perf_ac_max:
                next_perf_ac_argmax = np.array([x['test_accuracy'] for x in next_perfs]).argmax()
                center_config = next_configs[next_perf_ac_argmax]  
                print('\n\n')
                print('new best performance is', next_perf_ac_max, 'from model', center_config)
                print('\n\n')
                perfs.extend(next_perfs)  
                model_configs.extend(next_configs)
                filterbanks.extend(next_filterbanks)
            else:
                print('Breaking because no further optimization could be done.  Best existing performance was', perf_ac_max, 'while best next performance was', next_perf_ac_max)
                break
            
        else:
            print('Breaking because no next configs')
            break
        

    perfargmax = np.array([p['test_accuracy'] for p in perfs]).argmax()
    best_model = model_configs[perfargmax]
    best_performance = perfs[perfargmax]
        
    out_record = SON([('initial_model',initial_model),
                       ('task',son_escape(task)),
                       ('images',son_escape(image_args)),
                       ('images_hash',image_hash),
                       ('modifier_args',son_escape(modifier_args)),
                       ('modifier',modifier.__class__.__module__ + '.' + modifier.__class__.__name__)
                     ])   
    filename = get_filename(out_record)
    out_record['filename'] = filename
    out_record.update(SON([('performances',perfs)]))
    out_record.update(SON([('best_model',best_model)]))
    out_record.update(SON([('best_performance',best_performance)]))
    out_record.update(SON([('num_steps',len(model_configs))]))
    out_record.update(SON([('models',model_configs)]))
    outdata = cPickle.dumps(filterbanks)
        
    opt_fs.put(outdata,**out_record)
     
    if convolve_func == v1f.v1like_filter_pyfft:
        v1_pyfft.cleanup_pyfft() 
      
    createCertificateDict(outfile,{'image_file':image_certificate_file})
Beispiel #9
0
def evaluate(outfile,feature_certificate,cpath,task,ext_hash):

    conn = pm.Connection(document_class=bson.SON)
    db = conn[DB_NAME]
    
    perf_fs = gridfs.GridFS(db,'performance')
    perf_coll = db['performance.files']
    
    remove_existing(perf_coll,perf_fs,ext_hash)

    feature_certdict = cPickle.load(open(feature_certificate))
    feature_hash = feature_certdict['feature_hash']
    image_hash = feature_certdict['image_hash']
    model_hash = feature_certdict['model_hash']
    image_config_gen = feature_certdict['args']['images']
    model_col = db['models.files']
    feature_fs = gridfs.GridFS(db,'features')
    feature_col = db['features.files']
    
    stats = ['test_accuracy','ap','auc','mean_ap','mean_auc','train_accuracy']    
       
    if isinstance(task,list):
        task_list = task
    else:
        task_list = [task]
    
    model_configs = get_most_recent_files(model_col,{'__hash__':model_hash})
    
    for m in model_configs:
        print('Evaluating model',m) 
        for task in task_list:
            task['universe'] = task.get('universe',SON([]))
            task['universe']['model'] = m['config']['model']
            print('task', task)
            classifier_kwargs = task.get('classifier_kwargs',{})    
            split_results = []
            splits = generate_splits(task,feature_hash,'features') 
            for (ind,split) in enumerate(splits):
                print ('split', ind)
                train_data = split['train_data']
                test_data = split['test_data']
                
                train_filenames = [t['filename'] for t in train_data]
                test_filenames = [t['filename'] for t in test_data]
                assert set(train_filenames).intersection(test_filenames) == set([])
                
                print('train feature extraction ...')
                train_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in train_data])
                print('test feature extraction ...')
                test_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in test_data])
                train_labels = split['train_labels']
                test_labels = split['test_labels']
    
                print('classifier ...')
                res = svm.classify(train_features,train_labels,test_features,test_labels,classifier_kwargs)
                print('Split test accuracy', res['test_accuracy'])
                split_results.append(res)
        
            model_results = SON([])
            for stat in STATS:
                if stat in split_results[0] and split_results[0][stat] != None:
                    model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean()           
    
            out_record = SON([('model',m['config']['model']),
                              ('model_hash',model_hash), 
                              ('model_filename',m['filename']), 
                              ('images',son_escape(image_config_gen)),
                              ('image_hash',image_hash),
                              ('task',son_escape(task)),
                         ])
                                             
            filename = get_filename(out_record)
            out_record['filename'] = filename
            out_record['config_path'] = cpath
            out_record['__hash__'] = ext_hash
            out_record.update(model_results)
            print('dump out ...')
            out_data = cPickle.dumps(SON([('split_results',split_results),('splits',splits)]))
            
            perf_fs.put(out_data,**out_record)

    createCertificateDict(outfile,{'feature_file':feature_certificate})