Ejemplo n.º 1
0
def put_in_split(split,image_config_gen,m,task,ext_hash,split_id,split_fs):
    out_record = SON([('model',m['config']['model']),
                      ('images',son_escape(image_config_gen['images'])),
                      ('task',son_escape(task)),
                      ('split_id',split_id),
                 ])   

    filename = get_filename(out_record)
    out_record['filename'] = filename
    out_record['__hash__'] = ext_hash
    print('dump out ...')
    out_data = cPickle.dumps(SON([('split',split)]))
    
    split_fs.put(out_data,**out_record)
Ejemplo n.º 2
0
def train_test_loop(outfile,extract_creates,task_config,feature_config_path,hash):

    feature_config = get_config(feature_config_path)
        
    base_query = SON([('__config_hash__',hash)])
    
    image_params = SON([('image',feature_config['image'])])
    models_params = feature_config['models']

    ntrain = task_config['ntrain']
    ntest = task_config['ntest']
    ntrain_pos = task_config.get('ntrain_pos')
    N = task_config.get('N',10)
    query = task_config['query']  
    base_query.update(reach_in('config',task_config.get('universe',SON([]))))
 
    print('\n')
    print('BASE',base_query)
    print('\n')
    
    conn = pm.Connection(document_class=SON)
    db = conn['v1']
    fs = gridfs.GridFS(db, collection = 'model_performance')
    
    cquery = reach_in('config',query)
    for m in models_params:
        base_query_copy = base_query.copy()
        base_query_copy.update(reach_in('config.model',m))
        splitdata, results = train_test(cquery,'v1','features',ntrain,ntest,ntrain_pos=ntrain_pos,N=N,universe=base_query_copy)
        
        splitpickle = cPickle.dumps(splitdata)
        
        data = SON([('feature_config_path',feature_config_path),
                    ('model',m),
                    ('task',son_escape(task_config)),
                    ('image__aggregate__',son_escape(feature_config['image']))])
        filename = get_filename(data)
        data.update(results)
        data['filename'] = filename
        

        fs.put(splitpickle,**data)
        
    createCertificateDict(outfile,{'task_config':task_config,'feature_config':feature_config,'feature_config_path':feature_config_path})    
Ejemplo n.º 3
0
def put_in_split_result(res,image_config_gen,m,task,ext_hash,split_id,splitres_fs):
    out_record = SON([('model',m['config']['model']),
                      ('images',son_escape(image_config_gen['images'])),
                      ('task',son_escape(task)),
                      ('split_id',split_id),
                 ])   
                 
    split_result = SON([])
    for stat in STATS:
        if stat in res and res[stat] != None:
            split_result[stat] = res[stat] 

    filename = get_filename(out_record)
    out_record['filename'] = filename
    out_record['__hash__'] = ext_hash
    out_record.update(split_result)
    print('dump out split result...')
    out_data = cPickle.dumps(SON([('split_result',res)]))
    splitres_fs.put(out_data,**out_record)          
Ejemplo n.º 4
0
def put_in_performance(split_results,image_config_gen,m,model_hash,image_hash,perf_coll,task,ext_hash):
    
    model_results = SON([])
    for stat in STATS:
        if stat in split_results[0] and split_results[0][stat] != None:
            model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean()           

    out_record = SON([('model',m['config']['model']),
                      ('model_hash',model_hash), 
                      ('model_filename',m['filename']), 
                      ('images',son_escape(image_config_gen['images'])),
                      ('image_hash',image_hash),
                      ('task',son_escape(task)),
                      ('__hash__',ext_hash)
                 ])
                 
    out_record.update(model_results)

    perf_coll.insert(out_record)
Ejemplo n.º 5
0
def greedy_optimization(outfile,task,image_certificate_file,initial_model,convolve_func,rep_limit, modifier_args,modifier):

    conn = pm.Connection(document_class=bson.SON)
    db = conn['v1']
    
    opt_fs = gridfs.GridFS(db,'optimized_performance')
    
    image_coll = db['raw_images.files']
    image_fs = gridfs.GridFS(db,'raw_images')
    
    image_certdict = cPickle.load(open(image_certificate_file))
    print('using image certificate', image_certificate_file)
    
    image_hash = image_certdict['run_hash']
    image_args = image_certdict['out_args']

    if convolve_func == v1f.v1like_filter_pyfft:
        v1_pyfft.setup_pyfft()
    
  
    filterbanks = []
    perfs = []
    model_configs = []
    center_config = initial_model
    
    i = 0
    improving = True
    
    
    while ((i < rep_limit) or rep_limit is None):
        i += 1
        print('Round', i)
        next_configs = [m for m in get_consistent_deltas(center_config,modifier) if m not in model_configs]

        if next_configs:
            next_results = [get_performance(task,image_hash,image_fs,m,convolve_func) for m in next_configs]
            next_perfs = [x[0] for x in next_results]
            next_filterbanks = [x[1] for x in next_results]
            next_perf_ac_max = np.array([x['test_accuracy'] for x in next_perfs]).max()
            perf_ac_max = max([x['test_accuracy'] for x in perfs]) if perfs else 0
            if next_perf_ac_max > perf_ac_max:
                next_perf_ac_argmax = np.array([x['test_accuracy'] for x in next_perfs]).argmax()
                center_config = next_configs[next_perf_ac_argmax]  
                print('\n\n')
                print('new best performance is', next_perf_ac_max, 'from model', center_config)
                print('\n\n')
                perfs.extend(next_perfs)  
                model_configs.extend(next_configs)
                filterbanks.extend(next_filterbanks)
            else:
                print('Breaking because no further optimization could be done.  Best existing performance was', perf_ac_max, 'while best next performance was', next_perf_ac_max)
                break
            
        else:
            print('Breaking because no next configs')
            break
        

    perfargmax = np.array([p['test_accuracy'] for p in perfs]).argmax()
    best_model = model_configs[perfargmax]
    best_performance = perfs[perfargmax]
        
    out_record = SON([('initial_model',initial_model),
                       ('task',son_escape(task)),
                       ('images',son_escape(image_args)),
                       ('images_hash',image_hash),
                       ('modifier_args',son_escape(modifier_args)),
                       ('modifier',modifier.__class__.__module__ + '.' + modifier.__class__.__name__)
                     ])   
    filename = get_filename(out_record)
    out_record['filename'] = filename
    out_record.update(SON([('performances',perfs)]))
    out_record.update(SON([('best_model',best_model)]))
    out_record.update(SON([('best_performance',best_performance)]))
    out_record.update(SON([('num_steps',len(model_configs))]))
    out_record.update(SON([('models',model_configs)]))
    outdata = cPickle.dumps(filterbanks)
        
    opt_fs.put(outdata,**out_record)
     
    if convolve_func == v1f.v1like_filter_pyfft:
        v1_pyfft.cleanup_pyfft() 
      
    createCertificateDict(outfile,{'image_file':image_certificate_file})
""" Example Parameters module
"""

#from collections import OrderedDict
import copy
import itertools
from bson import SON
import config.ten_categories_images as Images
import config.renderman_correlation_tasks2 as Tasks
from dbutils import son_escape

import config.ht_l1_gabor_models_for_corr as l1_models

corr_layer =  SON([(u'filter',SON([(u'model_name','correlation'),
                                   (u'random_subset',SON([('const',.5)])),
                                   (u'images',son_escape(Images.config['images']))])),
                   (u'activ', SON([(u'min_out', 0), 
                                   (u'max_out', 1)]))])


models = []
for M in l1_models.config['models']:
    for num_filters in [256,384]:
        for task in Tasks.config['extractions']:
            m = copy.deepcopy(M)
            m['layers'].append(copy.deepcopy(corr_layer))
            m['layers'][2]['filter']['task'] = son_escape(task)
            m['layers'][2]['filter']['num_filters'] = num_filters
            models.append(m)
    
                                                (u'ker_shape', [13, 13]), 
                                                (u'divfreqs', [2, 4, 7, 8, 11]), 
                                                (u'norients', 7)])), 
                               (u'activ', SON([(u'min_out', 0), 
                                               (u'max_out', 1)])), 
                               (u'lnorm', SON([(u'inker_shape', [9, 9]), 
                                               (u'outker_shape', [9, 9]), 
                                               (u'threshold', 10.0), 
                                               (u'stretch', 0.1)])), 
                               (u'lpool', SON([(u'order', 2), 
                                               (u'stride', 2), 
                                               (u'ker_shape', [5, 5])]))]),
                          SON([(u'filter',SON([(u'model_name','correlation'),
                                               (u'num_filters',256),
                                               (u'random_subset',SON([('const',.5)])),
                                               (u'task',son_escape(Tasks.config['extractions'][0])),
                                               (u'images',son_escape(Images.config['images']))])),
                               (u'activ', SON([(u'min_out', 0), 
                                               (u'max_out', 1)]))])
                         ]),
                        
            ])

config = {
     'models': [model]
}
 



                                    (u"stretch", 0.1),
                                ]
                            ),
                        ),
                        (u"lpool", SON([(u"order", 2), (u"stride", 2), (u"ker_shape", [5, 5])])),
                    ]
                ),
                SON(
                    [
                        (
                            u"filter",
                            SON(
                                [
                                    (u"model_name", "correlation"),
                                    (u"random_subset", SON([("const", 0.5)])),
                                    (u"num_filters", 256),
                                    (u"task", son_escape(Tasks.config["extractions"][0])),
                                    (u"images", son_escape(Images.config["images"])),
                                ]
                            ),
                        ),
                        (u"activ", SON([(u"min_out", 0), (u"max_out", 1)])),
                    ]
                ),
            ],
        ),
    ]
)

config = {"models": [model]}
Ejemplo n.º 9
0
def extract_and_evaluate_parallel(outfile,image_certificate_file,model_certificate_file,cpath,convolve_func_name,task,ext_hash):
        
    (model_configs, image_config_gen, model_hash, image_hash, task_list,
     perf_col, split_coll, split_fs, splitperf_coll, splitperf_fs) = prepare_extract_and_evaluate(ext_hash,
                                                                                                  image_certificate_file,
                                                                                                  model_certificate_file,
                                                                                                  task)

    
    jobids = []
    if convolve_func_name == 'numpy':
        opstring = '-l qname=extraction_cpu.q'
    elif convolve_func_name == 'pyfft':
        opstring = '-l qname=extraction_gpu.q -o /home/render -e /home/render'
        
    for m in model_configs: 
        print('Evaluating model',m)
        for task in task_list:
            classifier_kwargs = task.get('classifier_kwargs',{})    
            print('task',task)
            splits = generate_splits(task,image_hash,'images') 
            for (ind,split) in enumerate(splits):
                put_in_split(split,image_config_gen,m,task,ext_hash,ind,split_fs)  
                jobid = qsub(extract_and_evaluate_parallel_core,(image_config_gen,m,task,ext_hash,ind,convolve_func_name),opstring=opstring)
                jobids.append(jobid)

    print(jobids)
    statuses = wait_and_get_statuses(jobids)
    
    for m in model_configs: 
        print('Evaluating model',m)
        for task in task_list:
            split_results = get_most_recent_files(splitperf_coll,{'__hash__':ext_hash,'task':son_escape(task),'model':m['config']['model'],'images':son_escape(image_config_gen['images'])})
            put_in_performance(split_results,image_config_gen,m,model_hash,image_hash,perf_col,task,ext_hash)

    createCertificateDict(outfile,{'image_file':image_certificate_file,'models_file':model_certificate_file})
Ejemplo n.º 10
0
def extract_and_evaluate_parallel_core(image_config_gen,m,task,ext_hash,split_id,convolve_func_name,cache_port=None):

    if cache_port is None:
        cache_port = NETWORK_CACHE_PORT
        

               
    conn = pm.Connection(document_class=bson.SON)
    db = conn[DB_NAME]
    split_col = db['splits.files']
    split_fs = gridfs.GridFS(db,'splits')

    splitconf = get_most_recent_files(split_col,{'__hash__':ext_hash,'split_id':split_id,'model':m['config']['model'],'images':son_escape(image_config_gen['images'])})[0]
    split = cPickle.loads(split_fs.get_version(splitconf['filename']).read())['split']
    res = extract_and_evaluate_core(split,m,convolve_func_name,task,cache_port)
    splitperf_fs = gridfs.GridFS(db,'split_performance')
    put_in_split_result(res,image_config_gen,m,task,ext_hash,split_id,splitperf_fs)
Ejemplo n.º 11
0
def evaluate(outfile,feature_certificate,cpath,task,ext_hash):

    conn = pm.Connection(document_class=bson.SON)
    db = conn[DB_NAME]
    
    perf_fs = gridfs.GridFS(db,'performance')
    perf_coll = db['performance.files']
    
    remove_existing(perf_coll,perf_fs,ext_hash)

    feature_certdict = cPickle.load(open(feature_certificate))
    feature_hash = feature_certdict['feature_hash']
    image_hash = feature_certdict['image_hash']
    model_hash = feature_certdict['model_hash']
    image_config_gen = feature_certdict['args']['images']
    model_col = db['models.files']
    feature_fs = gridfs.GridFS(db,'features')
    feature_col = db['features.files']
    
    stats = ['test_accuracy','ap','auc','mean_ap','mean_auc','train_accuracy']    
       
    if isinstance(task,list):
        task_list = task
    else:
        task_list = [task]
    
    model_configs = get_most_recent_files(model_col,{'__hash__':model_hash})
    
    for m in model_configs:
        print('Evaluating model',m) 
        for task in task_list:
            task['universe'] = task.get('universe',SON([]))
            task['universe']['model'] = m['config']['model']
            print('task', task)
            classifier_kwargs = task.get('classifier_kwargs',{})    
            split_results = []
            splits = generate_splits(task,feature_hash,'features') 
            for (ind,split) in enumerate(splits):
                print ('split', ind)
                train_data = split['train_data']
                test_data = split['test_data']
                
                train_filenames = [t['filename'] for t in train_data]
                test_filenames = [t['filename'] for t in test_data]
                assert set(train_filenames).intersection(test_filenames) == set([])
                
                print('train feature extraction ...')
                train_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in train_data])
                print('test feature extraction ...')
                test_features = sp.row_stack([load_features(f['filename'],feature_fs,m,task) for f in test_data])
                train_labels = split['train_labels']
                test_labels = split['test_labels']
    
                print('classifier ...')
                res = svm.classify(train_features,train_labels,test_features,test_labels,classifier_kwargs)
                print('Split test accuracy', res['test_accuracy'])
                split_results.append(res)
        
            model_results = SON([])
            for stat in STATS:
                if stat in split_results[0] and split_results[0][stat] != None:
                    model_results[stat] = sp.array([split_result[stat] for split_result in split_results]).mean()           
    
            out_record = SON([('model',m['config']['model']),
                              ('model_hash',model_hash), 
                              ('model_filename',m['filename']), 
                              ('images',son_escape(image_config_gen)),
                              ('image_hash',image_hash),
                              ('task',son_escape(task)),
                         ])
                                             
            filename = get_filename(out_record)
            out_record['filename'] = filename
            out_record['config_path'] = cpath
            out_record['__hash__'] = ext_hash
            out_record.update(model_results)
            print('dump out ...')
            out_data = cPickle.dumps(SON([('split_results',split_results),('splits',splits)]))
            
            perf_fs.put(out_data,**out_record)

    createCertificateDict(outfile,{'feature_file':feature_certificate})