def store_consistency(behavior_name, consistency_type):
    results_coll = benchmark_db[behavior_name]
    results_key_name = '_'.join(['hvm', consistency_type, 'two_way'])
    if consistency_type == 'subordinate':
        human_data = get_subordinate_human_data()
        model_data = g.get_model_behavior(behavior_name, consistency_type)
    elif consistency_type == 'basic':
        human_data = get_basic_human_data()
        model_data = g.get_model_behavior(behavior_name, consistency_type)
    elif consistency_type == 'all':
        human_data = tb.tab_rowstack(
            [get_basic_human_data(),
             get_subordinate_human_data()])
        model_data = tb.tab_rowstack([
            g.get_model_behavior(behavior_name, 'basic'),
            g.get_model_behavior(behavior_name, 'subordinate')
        ])
    else:
        print "%s Not recognized as a consistency type" % consistency_type
        raise ValueError

    consistency_kwargs = {
        'metric': 'dp_standard',
        'kwargs': None,
        'split_field': 'two_way_type',
        'image_property': 'task_category',
        'response_property': 'Response',
        'bstrapiter': 3
    }
    results = {'consistency_kwargs': consistency_kwargs}
    results[results_key_name] = trial_split_consistency(
        human_data, model_data, **consistency_kwargs)
    results_coll.insert(utils.SONify(results))
def get_trials(fs, type_tag):
    recs = [rec for rec in fs._GridFS__files.find({'type_tag': type_tag})]
    trials = []
    for rec in recs:
        results_dic = cPickle.loads(fs.get_last_version(_id=rec['_id']).read())
        trials.append(trials_from_results_dic(results_dic, rec['two_way_type']))
    return tb.tab_rowstack(trials)
Ejemplo n.º 3
0
def specific_config_gen(IC,args):
    IC.base_dir = args['base_dir']
    IC.annotate_dir = args['annotate_dir']
    IC.groundtruth_dir = args['groundtruth_dir']
    IC.correspondence = tb.tabarray(SVfile = args['frame_correspondence'])
    IC.size = args['size']
    IC.prefix = prefix = args.get('image_extension','.jpg')
    IC.current_frame_path = None
    csvs = [x for x in os.listdir(IC.annotate_dir) if x.endswith('.csv')]
    csvs.sort()
    Xs = [tb.tabarray(SVfile = os.path.join(IC.annotate_dir,csv)) for csv in csvs]
    cns = [csv.split('.')[0] for csv in csvs]
    cns = [[cn]*len(X) for (cn,X) in zip(cns,Xs)]
    Xs = [X.addcols(cn,names=['clip_num']) for (cn,X) in zip(cns,Xs)]

    csvs = [x for x in os.listdir(IC.groundtruth_dir) if x.endswith('.csv')]
    csvs.sort()
    Gs = []
    fields = ['clip_num','Frame'] + xfields + yfields
    for ind,csv in enumerate(csvs):
        try:
            g = tb.tabarray(SVfile = os.path.join(IC.groundtruth_dir,csv))
        except:
            x = Xs[ind].addcols([-1]*len(Xs[ind]),names=['Correctness'])
        else:
            g = g.addcols([csv.split('.')[0]]*len(g),names = ['clip_num'])
            g = g[fields + ['Confidence']]
            g.renamecol('Confidence','Correctness')
            x = Xs[ind].join(g,keycols=fields)
        Gs.append(x)
    X = tb.tab_rowstack(Gs)
    X.sort(order=['clip_num','Frame'])
    
    Y = IC.correspondence
    F = tb.fast.recarrayisin(Y[['clip_num','Frame']],X[['clip_num','Frame']])
    Y = Y[F]
    X = X.join(Y,keycols=['clip_num','Frame'])

    params = []
    for t in X:
        print(t)  
        cn = t['clip_num']
        fr = t['Frame']
        box = get_darpa_box(t)
        bb = box.pop('box')
        xc,yc = bb.center
        center = correct_center((xc,yc),IC.size,(1920,1080))
        bb_new = bbox.BoundingBox(center = center,width = IC.size[0], height = IC.size[1])
        p = SON([('size',IC.size),
                     ('bounding_box',SON([('xfields',list(bb_new.xs)),('yfields',list(bb_new.ys))])),
                     ('original_bounding_box',SON([('xfields',list(bb.xs)),('yfields',list(bb.ys))])),
                     ('clip_num',cn),
                     ('Frame',int(t['Original'])),
                     ('base_dir',IC.base_dir),
                     ('correctness',int(t['Correctness']))])
        p.update(box)
        p['GuessObjectType'] = p['ObjectType']
        p['ObjectType'] = p['ObjectType'] if t['Correctness'] == 1 else ''
        params.append(SON([('image',p)]))
    return params
Ejemplo n.º 4
0
def get_trials(fs, type_tag):
    recs = [rec for rec in fs._GridFS__files.find({'type_tag': type_tag})]
    trials = []
    for rec in recs:
        results_dic = cPickle.loads(fs.get_last_version(_id=rec['_id']).read())
        trials.append(trials_from_results_dic(results_dic,
                                              rec['two_way_type']))
    return tb.tab_rowstack(trials)
def trials_from_results_dic(results_dic, two_way_type, label_field):
    trials = []
    for i, split in enumerate(results_dic['splits'][0]):
        split_results = results_dic['split_results'][i]
        correct = np.array(split_results['test_errors'][0])==0
        Response = np.array(split_results['test_prediction'])
        meta = dataset.meta[split['test']]
        t_type = np.array([two_way_type]*meta.shape[0])
        worker_ids = np.array([i]*meta.shape[0])  # Modeling subjects as splits
        meta = meta.addcols([correct, Response, t_type, worker_ids],
                      names=['correct', 'Response', 'two_way_type', 'WorkerId'])
        trials.append(meta)
    return tb.tab_rowstack(trials)
def store_consistency(behavior_name, consistency_type):
    results_coll = benchmark_db[behavior_name]
    results_key_name = '_'.join(['hvm', consistency_type, 'two_way'])
    if consistency_type == 'subordinate':
        human_data = get_subordinate_human_data()
        model_data = g.get_model_behavior(behavior_name, consistency_type)
    elif consistency_type == 'basic':
        human_data = get_basic_human_data()
        model_data = g.get_model_behavior(behavior_name, consistency_type)
    elif consistency_type == 'all':
        human_data = tb.tab_rowstack([get_basic_human_data(),
                                      get_subordinate_human_data()])
        model_data = tb.tab_rowstack([g.get_model_behavior(behavior_name, 'basic'),
                                      g.get_model_behavior(behavior_name, 'subordinate')])
    else:
        print "%s Not recognized as a consistency type" % consistency_type
        raise ValueError

    consistency_kwargs = {'metric': 'dp_standard', 'kwargs': None, 'split_field': 'two_way_type',
                          'image_property': 'task_category', 'response_property': 'Response', 'bstrapiter': 3}
    results = {'consistency_kwargs': consistency_kwargs}
    results[results_key_name] = trial_split_consistency(human_data, model_data, **consistency_kwargs)
    results_coll.insert(utils.SONify(results))
Ejemplo n.º 7
0
def trials_from_results_dic(results_dic, two_way_type, label_field):
    trials = []
    for i, split in enumerate(results_dic['splits'][0]):
        split_results = results_dic['split_results'][i]
        correct = np.array(split_results['test_errors'][0]) == 0
        Response = np.array(split_results['test_prediction'])
        meta = dataset.meta[split['test']]
        t_type = np.array([two_way_type] * meta.shape[0])
        worker_ids = np.array([i] *
                              meta.shape[0])  # Modeling subjects as splits
        meta = meta.addcols(
            [correct, Response, t_type, worker_ids],
            names=['correct', 'Response', 'two_way_type', 'WorkerId'])
        trials.append(meta)
    return tb.tab_rowstack(trials)
Ejemplo n.º 8
0
def applyparser(oldmanifestpath,oldtotallinkpath,datadir,newmanifestpath,newtotallinkpath,F,splitfunc,prefixlist,round):
	M = tb.tabarray(SVfile = oldmanifestpath)
	
	is_prefix = 'Prefix' in M.dtype.names
	if 'Extension' in M.dtype.names:
		Extensions = M['Extension']
	else:
		Extensions = ['html']*len(M)
	if 'Categories' in M.coloring.keys():		
		Results = [F(datadir + (M['Prefix'][i] + '/' if is_prefix else '')  + pathprocessor(M['Categories'][i]) + '.' +  Extensions[i],M[i]) for i in range(len(M))]
		RResults = [r for r in Results if r is not None]
		lens = [len(r) if r is not None else 0 for r in Results]
		NM = M['Categories'].repeat(lens).colstack(tb.tab_rowstack(RResults),mode='rename')
	else:
		Results = [F(datadir + (M['Prefix'][i] + '/' if is_prefix else '')  + pathprocessor(()) + '.' + Extensions[i],M[i]) for i in range(len(M))]
		NM = tb.tab_rowstack(Results)
		
	if not 'Download' in NM.coloring.keys() and 'URL' in NM.dtype.names:
		NM.coloring['Download'] = ['URL']
				
	if newtotallinkpath:
		assert 'Download' in NM.coloring.keys()
		T = tb.tabarray(SVfile = oldtotallinkpath)
		DD = np.array([str([(o,x[o]) for o in x.dtype.names]) for x in NM['Download']])
		NewLinks = np.invert(tb.fast.isin(DD,T['Download']))
		NM = NM[NewLinks]
		NT = T.rowstack(tb.tabarray(records = zip([round]*len(DD),DD),names = ['Round','Download']))
		NT.saveSV(newtotallinkpath,metadata=True)
	
	if (splitfunc != None) and (prefixlist != None):
		Prefixes = [splitfunc(x) for x in NM]
		badprefixes = [y for y in Prefixes if '/' in y or y not in prefixlist]
		assert len(badprefixes) == 0, 'Given the splitter prefix list ' + str(prefixlist) + ', the following bad prefixes occured:' + str(badprefixes)	
		NM = NM.addcols(Prefixes,names=['Prefix'])
		
	NM.saveSV(newmanifestpath,metadata=True)
def generate_data(categories, ext, wBack):
    def get_nCAT(ext,len_meta):
        nCAT = 51200 if 'LESS' in ext else len_meta
        return nCAT
    meta = get_meta(categories[0])
    perm = np.random.RandomState(seed=0).permutation(len(meta))
    meta = meta[perm][:get_nCAT(ext,len(meta))]
    im_pth = [get_im_pth(categories[0]) + str(i) + '.jpeg' for i in range(len(meta))][:get_nCAT(ext,len(meta))]
    if len(categories) > 1:
        for cat in categories[1:]:
            meta_cat = get_meta(cat)
            perm = np.random.RandomState(seed=0).permutation(len(meta_cat))
            meta = tabular.tab_rowstack((meta, meta_cat[perm][:get_nCAT(ext,len(meta))]))
            im_pth = im_pth + [get_im_pth(cat) + str(i) + '.jpeg' for i in range(len(meta_cat))][:get_nCAT(ext,len(meta))]
        perm = np.random.RandomState(seed=0).permutation(len(meta))
        meta_p = meta[perm]
        im_pth = np.array(im_pth)[perm]
    else:
        meta_p = meta
        im_pth = np.array(im_pth)

    # get index
    index_train, index_test = generate_index(meta_p, ext)

    # generate data list
    
    generate_list(im_pth[index_train], ext, 'train')
    generate_list(im_pth[index_test], ext, 'test')

    # generate front data list
    im_front_pth = np.array([get_im_front_pth(obj, wBack) for obj in meta_p['obj']])
    generate_list(im_front_pth[index_train], ext+'_front', 'train')
    generate_list(im_front_pth[index_test], ext+'_front', 'test')

    # generate labels
    A = np.concatenate( (np.array(meta_p['ryz1']).reshape(len(meta),1),
                             np.array(meta_p['rxz1']).reshape(len(meta),1),
                             np.array(meta_p['rxy1']).reshape(len(meta),1)), axis=1)
    A = A.astype('float32')
    label = generate_label(A)
    with h5py.File('data/train_label_'+ext+'.h5', 'w') as f:
        f['label'] = label[index_train]
    with h5py.File('data/test_label_'+ext+'.h5', 'w') as f:
        f['label'] = label[index_test]
    with open('data/train_label_'+ext+'.txt', 'w') as f:
        f.write('/om/user/hyo/caffe/quatdiff/data/train_label_'+ext+'.h5\n')
    with open('data/test_label_'+ext+'.txt', 'w') as f:
        f.write('/om/user/hyo/caffe/quatdiff/data/test_label_'+ext+'.h5\n')
Ejemplo n.º 10
0
def get_class_pages():
    os.system('wget http://www.uspto.gov/patents/resources/classification/numeric/can.jsp -O class_pages.html')
    Soup = BeautifulSoup.BeautifulSoup(open('class_pages.html'))
    P = Soup.findAll('p',align='left')
    A = ['http://uspto.gov/' + str(dict(p.findAll('a')[0].attrs)['href']) for p in P]
    Xlist = []
    for (a,p) in zip(A,P):
        filename = Contents(p).strip().replace(' ','_') + '.html'
        os.system('wget ' + a + ' -O ' + filename)
        CSoup = BeautifulSoup.BeautifulSoup(open(filename))
        T = CSoup.findAll('h1','page-title')[0].findNext('table')
        TR = T.findAll('tr')
        headers = [Contents(t) for t in TR[0].findAll('th')]
        rows = [[Contents(td).replace(' ','') for td in tr.findAll('td')] for tr in TR[2:]]         
        rows = [r for r in rows if r[0]]
        X = tb.tabarray(records = rows, names = headers)
        Xlist.append(X)
    Y = tb.tab_rowstack(Xlist)
    Cat = Y[['CLASS','CLASS TITLE']].aggregate(On=['CLASS'],AggFunc=lambda x : x[0])
    Cat.saveSV('catlevels.tsv')
def get_exp(category, sandbox=True, dummy_upload=True):

    dataset = hvm.HvMWithDiscfade()
    meta = dataset.meta
    inds = (meta['category'] == category).nonzero()[0]
    meta = meta[inds]
    objs = np.unique(meta['obj'])
    combs = [objs]
    preproc = None
    image_bucket_name = 'hvm_timing'
    urls = dataset.publish_images(inds, preproc,
                                  image_bucket_name,
                                  dummy_upload=dummy_upload)


    base_url = 'https://s3.amazonaws.com/hvm_timing/'
    obj_resp_ids = [meta[meta['obj'] == o]['id'][0] for o in objs]
    response_images = [{
        'urls': [base_url + obj_id + '.png' for obj_id in obj_resp_ids],
        'meta': [{'obj': obj, 'category': category} for obj in objs],
        'labels': objs}]

    mult = 2
    html_data = {
            'response_images': response_images,
            'combs': combs,
            'num_trials': 90 * 8 * mult,
            'meta_field': 'obj',
            'meta': tb.tab_rowstack([meta] * mult),
            'urls': urls * mult,
            'shuffle_test': False,
    }

    additionalrules = [{'old': 'LEARNINGPERIODNUMBER',
                        'new':  str(LEARNING_PERIOD)},
                       {'old': 'OBJTYPE',
                        'new': category}]

    trials_per_hit = ACTUAL_TRIALS_PER_HIT + 32 + 16
    exp = MatchToSampleFromDLDataExperiment(
            htmlsrc='hvm_subordinate.html',
            htmldst='hvm_subordinate_' + category + '_n%05d.html',
            tmpdir='tmp_subordinate_%s' % category,
            sandbox=sandbox,
            title='Object recognition --- report what you see',
            reward=0.35,
            duration=1500,
            keywords=['neuroscience', 'psychology', 'experiment', 'object recognition'],  # noqa
            description="***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes.  By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.",  # noqa
            comment="hvm subordinate identification",  # noqa
            collection_name = 'hvm_subordinate_identification_%s' % category,
            max_assignments=1,
            bucket_name='hvm_subordinate_identification_test',
            trials_per_hit=trials_per_hit,  # 144 + 8x4 repeats + 16 training
            html_data=html_data,
            frame_height_pix=1200,
            othersrc = ['objnames.js', '../../lib/dltk.js', '../../lib/dltkexpr.js', '../../lib/dltkrsvp.js'],
            additionalrules=additionalrules,
            log_prefix='subordinate_' + category + '_'
            )

    # -- create trials
    exp.createTrials(sampling='without-replacement', verbose=1)
    n_total_trials = len(exp._trials['imgFiles'])
    assert n_total_trials == 90 * 8 * mult, n_total_trials

    # -- in each HIT, the followings will be repeated 4 times to
    # estimate "quality" of data

    ind_repeats = repeat_inds[category] * REPEATS_PER_QE_IMG
    rng = np.random.RandomState(0)
    rng.shuffle(ind_repeats)
    trials_qe = {e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats]
            for e in exp._trials}

    ind_learn = practice_inds[category]
    goodids = [meta[i]['id'] for i in ind_learn]

    trials_lrn = {}
    for e in exp._trials:
        trials_lrn[e] = []
        got = []
        for _ind, r in enumerate(exp._trials[e]):
            if exp._trials['imgData'][_ind]['Sample']['id'] in goodids and exp._trials['imgData'][_ind]['Sample']['id'] not in got :
                trials_lrn[e].append(copy.deepcopy(r))
                got.append(exp._trials['imgData'][_ind]['Sample']['id'])
    assert len(trials_lrn['imgData']) == len(goodids), len(trials_lrn['imgData'])

    offsets = np.arange(ACTUAL_TRIALS_PER_HIT - 3, -1, -ACTUAL_TRIALS_PER_HIT / float(len(ind_repeats))
            ).round().astype('int')

    n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT
    n_applied_hits = 0
    for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT,
            -1, -ACTUAL_TRIALS_PER_HIT):
        for k in trials_qe:
            for i, offset in enumerate(offsets):
                exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i])
        n_applied_hits += 1

    for j in range(n_applied_hits):
        for k in trials_lrn:
            for i in range(len(ind_learn)):
                exp._trials[k].insert(trials_per_hit * j, trials_lrn[k][i])

    #shuffle test on a per-hit basis
    for j in range(n_applied_hits):
        rng = np.random.RandomState(seed=j)
        perm = rng.permutation(8)
        for i in range(trials_per_hit * j, min(trials_per_hit * (j+1), len(exp._trials['imgFiles']))):
            f = copy.deepcopy(exp._trials['imgFiles'][i])
            t = copy.deepcopy(exp._trials['imgData'][i])
            f[1] = [f[1][_j] for _j in perm]
            exp._trials['imgFiles'][i] = f
            t['Test'] = [t['Test'][_j] for _j in perm]
            exp._trials['imgData'][i] = t
            l = copy.deepcopy(exp._trials['labels'][i])
            exp._trials['labels'][i] = [l[_j] for _j in perm]


    print '** n_applied_hits =', n_applied_hits
    print '** len for each in _trials =', \
            {e: len(exp._trials[e]) for e in exp._trials}

    # -- sanity check
    assert n_hits_floor == n_applied_hits == mult * 5, (n_hits_floor, n_applied_hits)
    assert len(exp._trials['imgFiles']) == mult * (720 + 5 * (32 + 16)),  len(exp._trials['imgFiles'])
    """
    s_ref_labels = set([tuple(e) for e in trials_qe['labels']])
    print(s_ref_labels)
    offsets2 = np.arange(8 * 4)[::-1] + offsets

    ibie = zip(range(0, 720 + 4 * 32, trials_per_hit), range(trials_per_hit, 720 + 4 * 32 + trials_per_hit, trials_per_hit))
    assert all([set([tuple(e) for e in
        np.array(exp._trials['labels'][ib:ie])[offsets2]]) == s_ref_labels
        for ib, ie in ibie[:-1]])
    print '** Finished creating trials.'
    """

    return exp, html_data
Ejemplo n.º 12
0
def get_exp(category, sandbox=True, dummy_upload=True):

    dataset = hvm.HvMWithDiscfade()
    meta = dataset.meta
    inds = (meta['category'] == category).nonzero()[0]
    meta = meta[inds]
    objs = np.unique(meta['obj'])
    combs = [objs]
    preproc = None
    image_bucket_name = 'hvm_timing'
    urls = dataset.publish_images(inds,
                                  preproc,
                                  image_bucket_name,
                                  dummy_upload=dummy_upload)

    base_url = 'https://s3.amazonaws.com/hvm_timing/'
    obj_resp_ids = [meta[meta['obj'] == o]['id'][0] for o in objs]
    response_images = [{
        'urls': [base_url + obj_id + '.png' for obj_id in obj_resp_ids],
        'meta': [{
            'obj': obj,
            'category': category
        } for obj in objs],
        'labels':
        objs
    }]

    mult = 2
    html_data = {
        'response_images': response_images,
        'combs': combs,
        'num_trials': 90 * 8 * mult,
        'meta_field': 'obj',
        'meta': tb.tab_rowstack([meta] * mult),
        'urls': urls * mult,
        'shuffle_test': False,
    }

    additionalrules = [{
        'old': 'LEARNINGPERIODNUMBER',
        'new': str(LEARNING_PERIOD)
    }, {
        'old': 'OBJTYPE',
        'new': category
    }]

    trials_per_hit = ACTUAL_TRIALS_PER_HIT + 32 + 16
    exp = MatchToSampleFromDLDataExperiment(
        htmlsrc='hvm_subordinate.html',
        htmldst='hvm_subordinate_' + category + '_n%05d.html',
        tmpdir='tmp_subordinate_%s' % category,
        sandbox=sandbox,
        title='Object recognition --- report what you see',
        reward=0.35,
        duration=1500,
        keywords=[
            'neuroscience', 'psychology', 'experiment', 'object recognition'
        ],  # noqa
        description=
        "***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes.  By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.",  # noqa
        comment="hvm subordinate identification",  # noqa
        collection_name='hvm_subordinate_identification_%s' % category,
        max_assignments=1,
        bucket_name='hvm_subordinate_identification_test',
        trials_per_hit=trials_per_hit,  # 144 + 8x4 repeats + 16 training
        html_data=html_data,
        frame_height_pix=1200,
        othersrc=[
            'objnames.js', '../../lib/dltk.js', '../../lib/dltkexpr.js',
            '../../lib/dltkrsvp.js'
        ],
        additionalrules=additionalrules,
        log_prefix='subordinate_' + category + '_')

    # -- create trials
    exp.createTrials(sampling='without-replacement', verbose=1)
    n_total_trials = len(exp._trials['imgFiles'])
    assert n_total_trials == 90 * 8 * mult, n_total_trials

    # -- in each HIT, the followings will be repeated 4 times to
    # estimate "quality" of data

    ind_repeats = repeat_inds[category] * REPEATS_PER_QE_IMG
    rng = np.random.RandomState(0)
    rng.shuffle(ind_repeats)
    trials_qe = {
        e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats]
        for e in exp._trials
    }

    ind_learn = practice_inds[category]
    goodids = [meta[i]['id'] for i in ind_learn]

    trials_lrn = {}
    for e in exp._trials:
        trials_lrn[e] = []
        got = []
        for _ind, r in enumerate(exp._trials[e]):
            if exp._trials['imgData'][_ind]['Sample'][
                    'id'] in goodids and exp._trials['imgData'][_ind][
                        'Sample']['id'] not in got:
                trials_lrn[e].append(copy.deepcopy(r))
                got.append(exp._trials['imgData'][_ind]['Sample']['id'])
    assert len(trials_lrn['imgData']) == len(goodids), len(
        trials_lrn['imgData'])

    offsets = np.arange(ACTUAL_TRIALS_PER_HIT - 3, -1, -ACTUAL_TRIALS_PER_HIT /
                        float(len(ind_repeats))).round().astype('int')

    n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT
    n_applied_hits = 0
    for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT, -1,
                                -ACTUAL_TRIALS_PER_HIT):
        for k in trials_qe:
            for i, offset in enumerate(offsets):
                exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i])
        n_applied_hits += 1

    for j in range(n_applied_hits):
        for k in trials_lrn:
            for i in range(len(ind_learn)):
                exp._trials[k].insert(trials_per_hit * j, trials_lrn[k][i])

    #shuffle test on a per-hit basis
    for j in range(n_applied_hits):
        rng = np.random.RandomState(seed=j)
        perm = rng.permutation(8)
        for i in range(
                trials_per_hit * j,
                min(trials_per_hit * (j + 1), len(exp._trials['imgFiles']))):
            f = copy.deepcopy(exp._trials['imgFiles'][i])
            t = copy.deepcopy(exp._trials['imgData'][i])
            f[1] = [f[1][_j] for _j in perm]
            exp._trials['imgFiles'][i] = f
            t['Test'] = [t['Test'][_j] for _j in perm]
            exp._trials['imgData'][i] = t
            l = copy.deepcopy(exp._trials['labels'][i])
            exp._trials['labels'][i] = [l[_j] for _j in perm]

    print '** n_applied_hits =', n_applied_hits
    print '** len for each in _trials =', \
            {e: len(exp._trials[e]) for e in exp._trials}

    # -- sanity check
    assert n_hits_floor == n_applied_hits == mult * 5, (n_hits_floor,
                                                        n_applied_hits)
    assert len(exp._trials['imgFiles']) == mult * (720 + 5 * (32 + 16)), len(
        exp._trials['imgFiles'])
    """
    s_ref_labels = set([tuple(e) for e in trials_qe['labels']])
    print(s_ref_labels)
    offsets2 = np.arange(8 * 4)[::-1] + offsets

    ibie = zip(range(0, 720 + 4 * 32, trials_per_hit), range(trials_per_hit, 720 + 4 * 32 + trials_per_hit, trials_per_hit))
    assert all([set([tuple(e) for e in
        np.array(exp._trials['labels'][ib:ie])[offsets2]]) == s_ref_labels
        for ib, ie in ibie[:-1]])
    print '** Finished creating trials.'
    """

    return exp, html_data
def get_exp(sandbox=True, dummy_upload=True):

    n_hits_from_data = len(meta) / ACTUAL_TRIALS_PER_HIT
    categories = np.unique(dataset.meta['identity'])
    combs = [categories]

    inds = np.arange(len(meta))
    preproc = dataset.default_preproc
    #preproc['flip_tb'] = True
    image_bucket_name = 'freiwald_tsao_2010'
    urls = dataset.publish_images(inds, preproc,
                                  image_bucket_name,
                                  dummy_upload=dummy_upload)

    base_url = 'https://canonical_images.s3.amazonaws.com/'
    response_images = [{
        'urls': [base_url + 'freiwald_tsao_2010_face%.2da.png' % i for i in range(1, 29)],
        'meta': [{'identity': i} for i in range(1, 29)],
        'labels': categories}]

    mult = 10
    html_data = {
            'response_images': response_images,
            'combs': combs,
            'num_trials': 220 * mult,
            'meta_field': 'identity',
            'meta': tb.tab_rowstack([meta] * mult),
            'urls': urls * mult,
            'shuffle_test': False,
    }

    additionalrules = [{'old': 'LEARNINGPERIODNUMBER',
                        'new':  str(LEARNING_PERIOD)}]

    trials_per_hit = ACTUAL_TRIALS_PER_HIT + REPEATS_PER_QE_IMG * len(repeat_inds) + LEARNING_PERIOD
    exp = MatchToSampleFromDLDataExperiment(
            htmlsrc='ft_identity.html',
            htmldst='ft_identity_n%05d.html',
            tmpdir='tmp_ft_identity',
            sandbox=sandbox,
            title='Face recognition --- report what you see',
            reward=0.35,
            duration=1500,
            keywords=['neuroscience', 'psychology', 'experiment', 'object recognition'],  # noqa
            description="***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes.  By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.",  # noqa
            comment="freiwald_tsao_identification",  # noqa
            collection_name = 'freiwald_tsao_identification',
            max_assignments=1,
            bucket_name='freiwald_tsao_identification',
            trials_per_hit=trials_per_hit,
            html_data=html_data,
            frame_height_pix=1200,
            othersrc = ['../../mturkutils/lib/dltk.js', '../../mturkutils/lib/dltkexpr.js', '../../mturkutils/lib/dltkrsvp.js'],
            additionalrules=additionalrules,
            log_prefix='freiwald_tsao_identification_'
            )

    # -- create trials
    exp.createTrials(sampling='without-replacement', verbose=1)
    n_total_trials = len(exp._trials['imgFiles'])
    assert n_total_trials == 220 * mult, n_total_trials

    # -- in each HIT, the followings will be repeated 4 times to
    # estimate "quality" of data

    ind_repeats = repeat_inds * REPEATS_PER_QE_IMG
    rng = np.random.RandomState(0)
    rng.shuffle(ind_repeats)
    trials_qe = {e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats]
            for e in exp._trials}

    ind_learn = practice_inds
    goodids = [meta[i]['id'] for i in ind_learn]

    trials_lrn = {}
    for e in exp._trials:
        trials_lrn[e] = []
        got = []
        for _ind, r in enumerate(exp._trials[e]):
            if exp._trials['imgData'][_ind]['Sample']['id'] in goodids and exp._trials['imgData'][_ind]['Sample']['id'] not in got :
                trials_lrn[e].append(copy.deepcopy(r))
                got.append(exp._trials['imgData'][_ind]['Sample']['id'])
    assert len(trials_lrn['imgData']) == len(goodids), len(trials_lrn['imgData'])

    offsets = np.arange(ACTUAL_TRIALS_PER_HIT - 1, -1, -ACTUAL_TRIALS_PER_HIT / float(len(ind_repeats))
            ).round().astype('int')

    print(len(offsets), offsets)

    print('a', len(exp._trials['imgFiles']))
    n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT
    n_applied_hits = 0
    for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT,
            -1, -ACTUAL_TRIALS_PER_HIT):
        for k in trials_qe:
            for i, offset in enumerate(offsets):
                exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i])
        n_applied_hits += 1

    print('b', len(exp._trials['imgFiles']))
    for j in range(n_applied_hits):
        for k in trials_lrn:
            for i in range(len(ind_learn)):
                exp._trials[k].insert(trials_per_hit * j, trials_lrn[k][i])

    print('c', len(exp._trials['imgFiles']))

    #shuffle test on a per-hit basis
    for j in range(n_applied_hits):
        rng = np.random.RandomState(seed=j)
        perm = rng.permutation(28)
        for i in range(trials_per_hit * j, min(trials_per_hit * (j+1), len(exp._trials['imgFiles']))):
            f = copy.deepcopy(exp._trials['imgFiles'][i])
            t = copy.deepcopy(exp._trials['imgData'][i])
            f[1] = [f[1][_j] for _j in perm]
            exp._trials['imgFiles'][i] = f
            t['Test'] = [t['Test'][_j] for _j in perm]
            exp._trials['imgData'][i] = t
            l = copy.deepcopy(exp._trials['labels'][i])
            exp._trials['labels'][i] = [l[_j] for _j in perm]


    print('d', len(exp._trials['imgFiles']))

    print '** n_applied_hits =', n_applied_hits
    print '** len for each in _trials =', \
            {e: len(exp._trials[e]) for e in exp._trials}


    _K = LEARNING_PERIOD + REPEATS_PER_QE_IMG * len(repeat_inds)
    # -- sanity check
    assert n_hits_floor == n_applied_hits == mult * n_hits_from_data, (n_total_trials, ACTUAL_TRIALS_PER_HIT, n_hits_floor, n_applied_hits, mult, n_hits_from_data)
    assert len(exp._trials['imgFiles']) == mult * (len(meta) + n_hits_from_data * _K),  (len(exp._trials['imgFiles']), mult,  (len(meta) + n_hits_from_data * _K), len(meta), n_hits_from_data, _K)

    return exp, html_data