def store_consistency(behavior_name, consistency_type): results_coll = benchmark_db[behavior_name] results_key_name = '_'.join(['hvm', consistency_type, 'two_way']) if consistency_type == 'subordinate': human_data = get_subordinate_human_data() model_data = g.get_model_behavior(behavior_name, consistency_type) elif consistency_type == 'basic': human_data = get_basic_human_data() model_data = g.get_model_behavior(behavior_name, consistency_type) elif consistency_type == 'all': human_data = tb.tab_rowstack( [get_basic_human_data(), get_subordinate_human_data()]) model_data = tb.tab_rowstack([ g.get_model_behavior(behavior_name, 'basic'), g.get_model_behavior(behavior_name, 'subordinate') ]) else: print "%s Not recognized as a consistency type" % consistency_type raise ValueError consistency_kwargs = { 'metric': 'dp_standard', 'kwargs': None, 'split_field': 'two_way_type', 'image_property': 'task_category', 'response_property': 'Response', 'bstrapiter': 3 } results = {'consistency_kwargs': consistency_kwargs} results[results_key_name] = trial_split_consistency( human_data, model_data, **consistency_kwargs) results_coll.insert(utils.SONify(results))
def get_trials(fs, type_tag): recs = [rec for rec in fs._GridFS__files.find({'type_tag': type_tag})] trials = [] for rec in recs: results_dic = cPickle.loads(fs.get_last_version(_id=rec['_id']).read()) trials.append(trials_from_results_dic(results_dic, rec['two_way_type'])) return tb.tab_rowstack(trials)
def specific_config_gen(IC,args): IC.base_dir = args['base_dir'] IC.annotate_dir = args['annotate_dir'] IC.groundtruth_dir = args['groundtruth_dir'] IC.correspondence = tb.tabarray(SVfile = args['frame_correspondence']) IC.size = args['size'] IC.prefix = prefix = args.get('image_extension','.jpg') IC.current_frame_path = None csvs = [x for x in os.listdir(IC.annotate_dir) if x.endswith('.csv')] csvs.sort() Xs = [tb.tabarray(SVfile = os.path.join(IC.annotate_dir,csv)) for csv in csvs] cns = [csv.split('.')[0] for csv in csvs] cns = [[cn]*len(X) for (cn,X) in zip(cns,Xs)] Xs = [X.addcols(cn,names=['clip_num']) for (cn,X) in zip(cns,Xs)] csvs = [x for x in os.listdir(IC.groundtruth_dir) if x.endswith('.csv')] csvs.sort() Gs = [] fields = ['clip_num','Frame'] + xfields + yfields for ind,csv in enumerate(csvs): try: g = tb.tabarray(SVfile = os.path.join(IC.groundtruth_dir,csv)) except: x = Xs[ind].addcols([-1]*len(Xs[ind]),names=['Correctness']) else: g = g.addcols([csv.split('.')[0]]*len(g),names = ['clip_num']) g = g[fields + ['Confidence']] g.renamecol('Confidence','Correctness') x = Xs[ind].join(g,keycols=fields) Gs.append(x) X = tb.tab_rowstack(Gs) X.sort(order=['clip_num','Frame']) Y = IC.correspondence F = tb.fast.recarrayisin(Y[['clip_num','Frame']],X[['clip_num','Frame']]) Y = Y[F] X = X.join(Y,keycols=['clip_num','Frame']) params = [] for t in X: print(t) cn = t['clip_num'] fr = t['Frame'] box = get_darpa_box(t) bb = box.pop('box') xc,yc = bb.center center = correct_center((xc,yc),IC.size,(1920,1080)) bb_new = bbox.BoundingBox(center = center,width = IC.size[0], height = IC.size[1]) p = SON([('size',IC.size), ('bounding_box',SON([('xfields',list(bb_new.xs)),('yfields',list(bb_new.ys))])), ('original_bounding_box',SON([('xfields',list(bb.xs)),('yfields',list(bb.ys))])), ('clip_num',cn), ('Frame',int(t['Original'])), ('base_dir',IC.base_dir), ('correctness',int(t['Correctness']))]) p.update(box) p['GuessObjectType'] = p['ObjectType'] p['ObjectType'] = p['ObjectType'] if t['Correctness'] == 1 else '' params.append(SON([('image',p)])) return params
def trials_from_results_dic(results_dic, two_way_type, label_field): trials = [] for i, split in enumerate(results_dic['splits'][0]): split_results = results_dic['split_results'][i] correct = np.array(split_results['test_errors'][0])==0 Response = np.array(split_results['test_prediction']) meta = dataset.meta[split['test']] t_type = np.array([two_way_type]*meta.shape[0]) worker_ids = np.array([i]*meta.shape[0]) # Modeling subjects as splits meta = meta.addcols([correct, Response, t_type, worker_ids], names=['correct', 'Response', 'two_way_type', 'WorkerId']) trials.append(meta) return tb.tab_rowstack(trials)
def store_consistency(behavior_name, consistency_type): results_coll = benchmark_db[behavior_name] results_key_name = '_'.join(['hvm', consistency_type, 'two_way']) if consistency_type == 'subordinate': human_data = get_subordinate_human_data() model_data = g.get_model_behavior(behavior_name, consistency_type) elif consistency_type == 'basic': human_data = get_basic_human_data() model_data = g.get_model_behavior(behavior_name, consistency_type) elif consistency_type == 'all': human_data = tb.tab_rowstack([get_basic_human_data(), get_subordinate_human_data()]) model_data = tb.tab_rowstack([g.get_model_behavior(behavior_name, 'basic'), g.get_model_behavior(behavior_name, 'subordinate')]) else: print "%s Not recognized as a consistency type" % consistency_type raise ValueError consistency_kwargs = {'metric': 'dp_standard', 'kwargs': None, 'split_field': 'two_way_type', 'image_property': 'task_category', 'response_property': 'Response', 'bstrapiter': 3} results = {'consistency_kwargs': consistency_kwargs} results[results_key_name] = trial_split_consistency(human_data, model_data, **consistency_kwargs) results_coll.insert(utils.SONify(results))
def trials_from_results_dic(results_dic, two_way_type, label_field): trials = [] for i, split in enumerate(results_dic['splits'][0]): split_results = results_dic['split_results'][i] correct = np.array(split_results['test_errors'][0]) == 0 Response = np.array(split_results['test_prediction']) meta = dataset.meta[split['test']] t_type = np.array([two_way_type] * meta.shape[0]) worker_ids = np.array([i] * meta.shape[0]) # Modeling subjects as splits meta = meta.addcols( [correct, Response, t_type, worker_ids], names=['correct', 'Response', 'two_way_type', 'WorkerId']) trials.append(meta) return tb.tab_rowstack(trials)
def applyparser(oldmanifestpath,oldtotallinkpath,datadir,newmanifestpath,newtotallinkpath,F,splitfunc,prefixlist,round): M = tb.tabarray(SVfile = oldmanifestpath) is_prefix = 'Prefix' in M.dtype.names if 'Extension' in M.dtype.names: Extensions = M['Extension'] else: Extensions = ['html']*len(M) if 'Categories' in M.coloring.keys(): Results = [F(datadir + (M['Prefix'][i] + '/' if is_prefix else '') + pathprocessor(M['Categories'][i]) + '.' + Extensions[i],M[i]) for i in range(len(M))] RResults = [r for r in Results if r is not None] lens = [len(r) if r is not None else 0 for r in Results] NM = M['Categories'].repeat(lens).colstack(tb.tab_rowstack(RResults),mode='rename') else: Results = [F(datadir + (M['Prefix'][i] + '/' if is_prefix else '') + pathprocessor(()) + '.' + Extensions[i],M[i]) for i in range(len(M))] NM = tb.tab_rowstack(Results) if not 'Download' in NM.coloring.keys() and 'URL' in NM.dtype.names: NM.coloring['Download'] = ['URL'] if newtotallinkpath: assert 'Download' in NM.coloring.keys() T = tb.tabarray(SVfile = oldtotallinkpath) DD = np.array([str([(o,x[o]) for o in x.dtype.names]) for x in NM['Download']]) NewLinks = np.invert(tb.fast.isin(DD,T['Download'])) NM = NM[NewLinks] NT = T.rowstack(tb.tabarray(records = zip([round]*len(DD),DD),names = ['Round','Download'])) NT.saveSV(newtotallinkpath,metadata=True) if (splitfunc != None) and (prefixlist != None): Prefixes = [splitfunc(x) for x in NM] badprefixes = [y for y in Prefixes if '/' in y or y not in prefixlist] assert len(badprefixes) == 0, 'Given the splitter prefix list ' + str(prefixlist) + ', the following bad prefixes occured:' + str(badprefixes) NM = NM.addcols(Prefixes,names=['Prefix']) NM.saveSV(newmanifestpath,metadata=True)
def generate_data(categories, ext, wBack): def get_nCAT(ext,len_meta): nCAT = 51200 if 'LESS' in ext else len_meta return nCAT meta = get_meta(categories[0]) perm = np.random.RandomState(seed=0).permutation(len(meta)) meta = meta[perm][:get_nCAT(ext,len(meta))] im_pth = [get_im_pth(categories[0]) + str(i) + '.jpeg' for i in range(len(meta))][:get_nCAT(ext,len(meta))] if len(categories) > 1: for cat in categories[1:]: meta_cat = get_meta(cat) perm = np.random.RandomState(seed=0).permutation(len(meta_cat)) meta = tabular.tab_rowstack((meta, meta_cat[perm][:get_nCAT(ext,len(meta))])) im_pth = im_pth + [get_im_pth(cat) + str(i) + '.jpeg' for i in range(len(meta_cat))][:get_nCAT(ext,len(meta))] perm = np.random.RandomState(seed=0).permutation(len(meta)) meta_p = meta[perm] im_pth = np.array(im_pth)[perm] else: meta_p = meta im_pth = np.array(im_pth) # get index index_train, index_test = generate_index(meta_p, ext) # generate data list generate_list(im_pth[index_train], ext, 'train') generate_list(im_pth[index_test], ext, 'test') # generate front data list im_front_pth = np.array([get_im_front_pth(obj, wBack) for obj in meta_p['obj']]) generate_list(im_front_pth[index_train], ext+'_front', 'train') generate_list(im_front_pth[index_test], ext+'_front', 'test') # generate labels A = np.concatenate( (np.array(meta_p['ryz1']).reshape(len(meta),1), np.array(meta_p['rxz1']).reshape(len(meta),1), np.array(meta_p['rxy1']).reshape(len(meta),1)), axis=1) A = A.astype('float32') label = generate_label(A) with h5py.File('data/train_label_'+ext+'.h5', 'w') as f: f['label'] = label[index_train] with h5py.File('data/test_label_'+ext+'.h5', 'w') as f: f['label'] = label[index_test] with open('data/train_label_'+ext+'.txt', 'w') as f: f.write('/om/user/hyo/caffe/quatdiff/data/train_label_'+ext+'.h5\n') with open('data/test_label_'+ext+'.txt', 'w') as f: f.write('/om/user/hyo/caffe/quatdiff/data/test_label_'+ext+'.h5\n')
def get_class_pages(): os.system('wget http://www.uspto.gov/patents/resources/classification/numeric/can.jsp -O class_pages.html') Soup = BeautifulSoup.BeautifulSoup(open('class_pages.html')) P = Soup.findAll('p',align='left') A = ['http://uspto.gov/' + str(dict(p.findAll('a')[0].attrs)['href']) for p in P] Xlist = [] for (a,p) in zip(A,P): filename = Contents(p).strip().replace(' ','_') + '.html' os.system('wget ' + a + ' -O ' + filename) CSoup = BeautifulSoup.BeautifulSoup(open(filename)) T = CSoup.findAll('h1','page-title')[0].findNext('table') TR = T.findAll('tr') headers = [Contents(t) for t in TR[0].findAll('th')] rows = [[Contents(td).replace(' ','') for td in tr.findAll('td')] for tr in TR[2:]] rows = [r for r in rows if r[0]] X = tb.tabarray(records = rows, names = headers) Xlist.append(X) Y = tb.tab_rowstack(Xlist) Cat = Y[['CLASS','CLASS TITLE']].aggregate(On=['CLASS'],AggFunc=lambda x : x[0]) Cat.saveSV('catlevels.tsv')
def get_exp(category, sandbox=True, dummy_upload=True): dataset = hvm.HvMWithDiscfade() meta = dataset.meta inds = (meta['category'] == category).nonzero()[0] meta = meta[inds] objs = np.unique(meta['obj']) combs = [objs] preproc = None image_bucket_name = 'hvm_timing' urls = dataset.publish_images(inds, preproc, image_bucket_name, dummy_upload=dummy_upload) base_url = 'https://s3.amazonaws.com/hvm_timing/' obj_resp_ids = [meta[meta['obj'] == o]['id'][0] for o in objs] response_images = [{ 'urls': [base_url + obj_id + '.png' for obj_id in obj_resp_ids], 'meta': [{'obj': obj, 'category': category} for obj in objs], 'labels': objs}] mult = 2 html_data = { 'response_images': response_images, 'combs': combs, 'num_trials': 90 * 8 * mult, 'meta_field': 'obj', 'meta': tb.tab_rowstack([meta] * mult), 'urls': urls * mult, 'shuffle_test': False, } additionalrules = [{'old': 'LEARNINGPERIODNUMBER', 'new': str(LEARNING_PERIOD)}, {'old': 'OBJTYPE', 'new': category}] trials_per_hit = ACTUAL_TRIALS_PER_HIT + 32 + 16 exp = MatchToSampleFromDLDataExperiment( htmlsrc='hvm_subordinate.html', htmldst='hvm_subordinate_' + category + '_n%05d.html', tmpdir='tmp_subordinate_%s' % category, sandbox=sandbox, title='Object recognition --- report what you see', reward=0.35, duration=1500, keywords=['neuroscience', 'psychology', 'experiment', 'object recognition'], # noqa description="***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes. By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.", # noqa comment="hvm subordinate identification", # noqa collection_name = 'hvm_subordinate_identification_%s' % category, max_assignments=1, bucket_name='hvm_subordinate_identification_test', trials_per_hit=trials_per_hit, # 144 + 8x4 repeats + 16 training html_data=html_data, frame_height_pix=1200, othersrc = ['objnames.js', '../../lib/dltk.js', '../../lib/dltkexpr.js', '../../lib/dltkrsvp.js'], additionalrules=additionalrules, log_prefix='subordinate_' + category + '_' ) # -- create trials exp.createTrials(sampling='without-replacement', verbose=1) n_total_trials = len(exp._trials['imgFiles']) assert n_total_trials == 90 * 8 * mult, n_total_trials # -- in each HIT, the followings will be repeated 4 times to # estimate "quality" of data ind_repeats = repeat_inds[category] * REPEATS_PER_QE_IMG rng = np.random.RandomState(0) rng.shuffle(ind_repeats) trials_qe = {e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats] for e in exp._trials} ind_learn = practice_inds[category] goodids = [meta[i]['id'] for i in ind_learn] trials_lrn = {} for e in exp._trials: trials_lrn[e] = [] got = [] for _ind, r in enumerate(exp._trials[e]): if exp._trials['imgData'][_ind]['Sample']['id'] in goodids and exp._trials['imgData'][_ind]['Sample']['id'] not in got : trials_lrn[e].append(copy.deepcopy(r)) got.append(exp._trials['imgData'][_ind]['Sample']['id']) assert len(trials_lrn['imgData']) == len(goodids), len(trials_lrn['imgData']) offsets = np.arange(ACTUAL_TRIALS_PER_HIT - 3, -1, -ACTUAL_TRIALS_PER_HIT / float(len(ind_repeats)) ).round().astype('int') n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT n_applied_hits = 0 for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT, -1, -ACTUAL_TRIALS_PER_HIT): for k in trials_qe: for i, offset in enumerate(offsets): exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i]) n_applied_hits += 1 for j in range(n_applied_hits): for k in trials_lrn: for i in range(len(ind_learn)): exp._trials[k].insert(trials_per_hit * j, trials_lrn[k][i]) #shuffle test on a per-hit basis for j in range(n_applied_hits): rng = np.random.RandomState(seed=j) perm = rng.permutation(8) for i in range(trials_per_hit * j, min(trials_per_hit * (j+1), len(exp._trials['imgFiles']))): f = copy.deepcopy(exp._trials['imgFiles'][i]) t = copy.deepcopy(exp._trials['imgData'][i]) f[1] = [f[1][_j] for _j in perm] exp._trials['imgFiles'][i] = f t['Test'] = [t['Test'][_j] for _j in perm] exp._trials['imgData'][i] = t l = copy.deepcopy(exp._trials['labels'][i]) exp._trials['labels'][i] = [l[_j] for _j in perm] print '** n_applied_hits =', n_applied_hits print '** len for each in _trials =', \ {e: len(exp._trials[e]) for e in exp._trials} # -- sanity check assert n_hits_floor == n_applied_hits == mult * 5, (n_hits_floor, n_applied_hits) assert len(exp._trials['imgFiles']) == mult * (720 + 5 * (32 + 16)), len(exp._trials['imgFiles']) """ s_ref_labels = set([tuple(e) for e in trials_qe['labels']]) print(s_ref_labels) offsets2 = np.arange(8 * 4)[::-1] + offsets ibie = zip(range(0, 720 + 4 * 32, trials_per_hit), range(trials_per_hit, 720 + 4 * 32 + trials_per_hit, trials_per_hit)) assert all([set([tuple(e) for e in np.array(exp._trials['labels'][ib:ie])[offsets2]]) == s_ref_labels for ib, ie in ibie[:-1]]) print '** Finished creating trials.' """ return exp, html_data
def get_exp(category, sandbox=True, dummy_upload=True): dataset = hvm.HvMWithDiscfade() meta = dataset.meta inds = (meta['category'] == category).nonzero()[0] meta = meta[inds] objs = np.unique(meta['obj']) combs = [objs] preproc = None image_bucket_name = 'hvm_timing' urls = dataset.publish_images(inds, preproc, image_bucket_name, dummy_upload=dummy_upload) base_url = 'https://s3.amazonaws.com/hvm_timing/' obj_resp_ids = [meta[meta['obj'] == o]['id'][0] for o in objs] response_images = [{ 'urls': [base_url + obj_id + '.png' for obj_id in obj_resp_ids], 'meta': [{ 'obj': obj, 'category': category } for obj in objs], 'labels': objs }] mult = 2 html_data = { 'response_images': response_images, 'combs': combs, 'num_trials': 90 * 8 * mult, 'meta_field': 'obj', 'meta': tb.tab_rowstack([meta] * mult), 'urls': urls * mult, 'shuffle_test': False, } additionalrules = [{ 'old': 'LEARNINGPERIODNUMBER', 'new': str(LEARNING_PERIOD) }, { 'old': 'OBJTYPE', 'new': category }] trials_per_hit = ACTUAL_TRIALS_PER_HIT + 32 + 16 exp = MatchToSampleFromDLDataExperiment( htmlsrc='hvm_subordinate.html', htmldst='hvm_subordinate_' + category + '_n%05d.html', tmpdir='tmp_subordinate_%s' % category, sandbox=sandbox, title='Object recognition --- report what you see', reward=0.35, duration=1500, keywords=[ 'neuroscience', 'psychology', 'experiment', 'object recognition' ], # noqa description= "***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes. By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.", # noqa comment="hvm subordinate identification", # noqa collection_name='hvm_subordinate_identification_%s' % category, max_assignments=1, bucket_name='hvm_subordinate_identification_test', trials_per_hit=trials_per_hit, # 144 + 8x4 repeats + 16 training html_data=html_data, frame_height_pix=1200, othersrc=[ 'objnames.js', '../../lib/dltk.js', '../../lib/dltkexpr.js', '../../lib/dltkrsvp.js' ], additionalrules=additionalrules, log_prefix='subordinate_' + category + '_') # -- create trials exp.createTrials(sampling='without-replacement', verbose=1) n_total_trials = len(exp._trials['imgFiles']) assert n_total_trials == 90 * 8 * mult, n_total_trials # -- in each HIT, the followings will be repeated 4 times to # estimate "quality" of data ind_repeats = repeat_inds[category] * REPEATS_PER_QE_IMG rng = np.random.RandomState(0) rng.shuffle(ind_repeats) trials_qe = { e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats] for e in exp._trials } ind_learn = practice_inds[category] goodids = [meta[i]['id'] for i in ind_learn] trials_lrn = {} for e in exp._trials: trials_lrn[e] = [] got = [] for _ind, r in enumerate(exp._trials[e]): if exp._trials['imgData'][_ind]['Sample'][ 'id'] in goodids and exp._trials['imgData'][_ind][ 'Sample']['id'] not in got: trials_lrn[e].append(copy.deepcopy(r)) got.append(exp._trials['imgData'][_ind]['Sample']['id']) assert len(trials_lrn['imgData']) == len(goodids), len( trials_lrn['imgData']) offsets = np.arange(ACTUAL_TRIALS_PER_HIT - 3, -1, -ACTUAL_TRIALS_PER_HIT / float(len(ind_repeats))).round().astype('int') n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT n_applied_hits = 0 for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT, -1, -ACTUAL_TRIALS_PER_HIT): for k in trials_qe: for i, offset in enumerate(offsets): exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i]) n_applied_hits += 1 for j in range(n_applied_hits): for k in trials_lrn: for i in range(len(ind_learn)): exp._trials[k].insert(trials_per_hit * j, trials_lrn[k][i]) #shuffle test on a per-hit basis for j in range(n_applied_hits): rng = np.random.RandomState(seed=j) perm = rng.permutation(8) for i in range( trials_per_hit * j, min(trials_per_hit * (j + 1), len(exp._trials['imgFiles']))): f = copy.deepcopy(exp._trials['imgFiles'][i]) t = copy.deepcopy(exp._trials['imgData'][i]) f[1] = [f[1][_j] for _j in perm] exp._trials['imgFiles'][i] = f t['Test'] = [t['Test'][_j] for _j in perm] exp._trials['imgData'][i] = t l = copy.deepcopy(exp._trials['labels'][i]) exp._trials['labels'][i] = [l[_j] for _j in perm] print '** n_applied_hits =', n_applied_hits print '** len for each in _trials =', \ {e: len(exp._trials[e]) for e in exp._trials} # -- sanity check assert n_hits_floor == n_applied_hits == mult * 5, (n_hits_floor, n_applied_hits) assert len(exp._trials['imgFiles']) == mult * (720 + 5 * (32 + 16)), len( exp._trials['imgFiles']) """ s_ref_labels = set([tuple(e) for e in trials_qe['labels']]) print(s_ref_labels) offsets2 = np.arange(8 * 4)[::-1] + offsets ibie = zip(range(0, 720 + 4 * 32, trials_per_hit), range(trials_per_hit, 720 + 4 * 32 + trials_per_hit, trials_per_hit)) assert all([set([tuple(e) for e in np.array(exp._trials['labels'][ib:ie])[offsets2]]) == s_ref_labels for ib, ie in ibie[:-1]]) print '** Finished creating trials.' """ return exp, html_data
def get_exp(sandbox=True, dummy_upload=True): n_hits_from_data = len(meta) / ACTUAL_TRIALS_PER_HIT categories = np.unique(dataset.meta['identity']) combs = [categories] inds = np.arange(len(meta)) preproc = dataset.default_preproc #preproc['flip_tb'] = True image_bucket_name = 'freiwald_tsao_2010' urls = dataset.publish_images(inds, preproc, image_bucket_name, dummy_upload=dummy_upload) base_url = 'https://canonical_images.s3.amazonaws.com/' response_images = [{ 'urls': [base_url + 'freiwald_tsao_2010_face%.2da.png' % i for i in range(1, 29)], 'meta': [{'identity': i} for i in range(1, 29)], 'labels': categories}] mult = 10 html_data = { 'response_images': response_images, 'combs': combs, 'num_trials': 220 * mult, 'meta_field': 'identity', 'meta': tb.tab_rowstack([meta] * mult), 'urls': urls * mult, 'shuffle_test': False, } additionalrules = [{'old': 'LEARNINGPERIODNUMBER', 'new': str(LEARNING_PERIOD)}] trials_per_hit = ACTUAL_TRIALS_PER_HIT + REPEATS_PER_QE_IMG * len(repeat_inds) + LEARNING_PERIOD exp = MatchToSampleFromDLDataExperiment( htmlsrc='ft_identity.html', htmldst='ft_identity_n%05d.html', tmpdir='tmp_ft_identity', sandbox=sandbox, title='Face recognition --- report what you see', reward=0.35, duration=1500, keywords=['neuroscience', 'psychology', 'experiment', 'object recognition'], # noqa description="***You may complete as many HITs in this group as you want*** Complete a visual object recognition task where you report the identity of objects you see. We expect this HIT to take about 10 minutes or less, though you must finish in under 25 minutes. By completing this HIT, you understand that you are participating in an experiment for the Massachusetts Institute of Technology (MIT) Department of Brain and Cognitive Sciences. You may quit at any time, and you will remain anonymous. Contact the requester with questions or concerns about this experiment.", # noqa comment="freiwald_tsao_identification", # noqa collection_name = 'freiwald_tsao_identification', max_assignments=1, bucket_name='freiwald_tsao_identification', trials_per_hit=trials_per_hit, html_data=html_data, frame_height_pix=1200, othersrc = ['../../mturkutils/lib/dltk.js', '../../mturkutils/lib/dltkexpr.js', '../../mturkutils/lib/dltkrsvp.js'], additionalrules=additionalrules, log_prefix='freiwald_tsao_identification_' ) # -- create trials exp.createTrials(sampling='without-replacement', verbose=1) n_total_trials = len(exp._trials['imgFiles']) assert n_total_trials == 220 * mult, n_total_trials # -- in each HIT, the followings will be repeated 4 times to # estimate "quality" of data ind_repeats = repeat_inds * REPEATS_PER_QE_IMG rng = np.random.RandomState(0) rng.shuffle(ind_repeats) trials_qe = {e: [copy.deepcopy(exp._trials[e][r]) for r in ind_repeats] for e in exp._trials} ind_learn = practice_inds goodids = [meta[i]['id'] for i in ind_learn] trials_lrn = {} for e in exp._trials: trials_lrn[e] = [] got = [] for _ind, r in enumerate(exp._trials[e]): if exp._trials['imgData'][_ind]['Sample']['id'] in goodids and exp._trials['imgData'][_ind]['Sample']['id'] not in got : trials_lrn[e].append(copy.deepcopy(r)) got.append(exp._trials['imgData'][_ind]['Sample']['id']) assert len(trials_lrn['imgData']) == len(goodids), len(trials_lrn['imgData']) offsets = np.arange(ACTUAL_TRIALS_PER_HIT - 1, -1, -ACTUAL_TRIALS_PER_HIT / float(len(ind_repeats)) ).round().astype('int') print(len(offsets), offsets) print('a', len(exp._trials['imgFiles'])) n_hits_floor = n_total_trials / ACTUAL_TRIALS_PER_HIT n_applied_hits = 0 for i_trial_begin in xrange((n_hits_floor - 1) * ACTUAL_TRIALS_PER_HIT, -1, -ACTUAL_TRIALS_PER_HIT): for k in trials_qe: for i, offset in enumerate(offsets): exp._trials[k].insert(i_trial_begin + offset, trials_qe[k][i]) n_applied_hits += 1 print('b', len(exp._trials['imgFiles'])) for j in range(n_applied_hits): for k in trials_lrn: for i in range(len(ind_learn)): exp._trials[k].insert(trials_per_hit * j, trials_lrn[k][i]) print('c', len(exp._trials['imgFiles'])) #shuffle test on a per-hit basis for j in range(n_applied_hits): rng = np.random.RandomState(seed=j) perm = rng.permutation(28) for i in range(trials_per_hit * j, min(trials_per_hit * (j+1), len(exp._trials['imgFiles']))): f = copy.deepcopy(exp._trials['imgFiles'][i]) t = copy.deepcopy(exp._trials['imgData'][i]) f[1] = [f[1][_j] for _j in perm] exp._trials['imgFiles'][i] = f t['Test'] = [t['Test'][_j] for _j in perm] exp._trials['imgData'][i] = t l = copy.deepcopy(exp._trials['labels'][i]) exp._trials['labels'][i] = [l[_j] for _j in perm] print('d', len(exp._trials['imgFiles'])) print '** n_applied_hits =', n_applied_hits print '** len for each in _trials =', \ {e: len(exp._trials[e]) for e in exp._trials} _K = LEARNING_PERIOD + REPEATS_PER_QE_IMG * len(repeat_inds) # -- sanity check assert n_hits_floor == n_applied_hits == mult * n_hits_from_data, (n_total_trials, ACTUAL_TRIALS_PER_HIT, n_hits_floor, n_applied_hits, mult, n_hits_from_data) assert len(exp._trials['imgFiles']) == mult * (len(meta) + n_hits_from_data * _K), (len(exp._trials['imgFiles']), mult, (len(meta) + n_hits_from_data * _K), len(meta), n_hits_from_data, _K) return exp, html_data