def createTrials(self): dummy_upload = True image_bucket_name = 'rosch_pose' seed = 0 meta_pth = '/home/hyo/.skdata/genthor/RoschDataset3_' + cats[0] + '_6eef6648406c333a4035cd5e60d0bf2ecf2606d7/meta.pkl' meta = cPickle.load(open(meta_pth)) perm = np.random.RandomState(0).permutation(len(meta)) meta_p = meta[perm] pinv = fast.perminverse(perm) # decide learning_inds learning_inds = np.arange(LEARNING_PERIOD) # decide query_inds objs = list(set(meta['obj'])) nObj = round(float(NUM_TRAIN) / len(objs)) + 1 query_inds = np.array([], dtype=int) v = np.arange(len(meta)) for obj in objs: query_inds = np.append(query_inds, perm[meta_p['obj']==obj][-nObj:]) query_inds = np.sort(query_inds)[:NUM_TRAIN] query_inds = np.append(query_inds, query_inds[-REPEATS:]) query_inds = np.append(learning_inds, query_inds) assert len(query_inds) == NUM_TRAIN+REPEATS+LEARNING_PERIOD print query_inds urls = publish_images(cats[0], query_inds, image_bucket_name, dummy_upload=dummy_upload) # construct experiment info bmeta = meta[query_inds] imgs = urls imgData = [{df: bm[df] for df in meta.dtype.names} for bm in bmeta] meta_pth = '/home/hyo/.skdata/genthor/RoschDataset3_' + cats[1] + '_6eef6648406c333a4035cd5e60d0bf2ecf2606d7/meta.pkl' meta = cPickle.load(open(meta_pth)) perm = np.random.RandomState(0).permutation(len(meta)) meta_p = meta[perm] pinv = fast.perminverse(perm) # decide query_inds objs = list(set(meta['obj'])) nObj = round(float(NUM_TEST) / len(objs)) + 1 query_inds_here = np.array([], dtype=int) v = np.arange(len(meta)) for obj in objs: query_inds_here = np.append(query_inds_here, perm[meta_p['obj']==obj][-nObj:]) query_inds_here = np.sort(query_inds_here)[:NUM_TEST] query_inds_here = np.append(query_inds_here, query_inds_here[inds_repeat]) assert len(query_inds_here) == NUM_TEST+REPEATS print query_inds_here # publish images if needed urls = publish_images(cats[1], query_inds_here, image_bucket_name, dummy_upload=dummy_upload) # construct experiment info bmeta = meta[query_inds_here] imgs = imgs + urls imgData = imgData + [{df: bm[df] for df in meta.dtype.names} for bm in bmeta] self._trials = {'imgFiles': imgs, 'imgData': imgData}
def get_metacol_base(self, ma, perm=None): assert isinstance(ma, str), ma if ma in self.dset.meta.dtype.names: metacol = self.dset.meta[ma][:] else: metacol = getattr(self.dset, ma)[:] if perm is not None: assert perm.shape == metacol.shape metacol = metacol[perm] if hasattr(self, 'subslice'): metacol = metacol[self.subslice] try: metacol + 1 labels_unique = None except TypeError: labels_unique = n.unique(metacol) s = metacol.argsort() cat_s = metacol[s] ss = n.array([0] + ((cat_s[1:] != cat_s[:-1]).nonzero()[0] + 1).tolist() + [len(cat_s)]) ssd = ss[1:] - ss[:-1] labels = n.repeat(n.arange(len(labels_unique)), ssd) metacol = labels[fast.perminverse(s)] #labels = n.zeros((mlen, ), dtype='int') #print(len(labels_unique), "L") #for label in range(len(labels_unique)): # labels[metacol == labels_unique[label]] = label #metacol = labels return metacol, labels_unique
def publish_images(cat, query_inds, bucket_name, dummy_upload=True): im_pth = '/home/hyo/.skdata/genthor/RoschDataset3_' + cat + '_6eef6648406c333a4035cd5e60d0bf2ecf2606d7/cache/568ce4d00d2c7901515e71c0f90628db084f9dc6/jpeg/' meta_pth = '/home/hyo/.skdata/genthor/RoschDataset3_' + cat + '_6eef6648406c333a4035cd5e60d0bf2ecf2606d7/meta.pkl' meta = cPickle.load(open(meta_pth)) if 'filename' not in meta.dtype.names: # save filenames perm = np.random.RandomState(0).permutation(len(meta)) pinv = fast.perminverse(perm) filename = [im_pth + str(pinv[i]) + '.jpeg' for i in range(len(meta))] meta = meta.addcols(filename, names='filename') with open(meta_pth, 'w') as f: cPickle.dump(meta, f) # publish to s3 conn = boto.connect_s3() b = conn.create_bucket(bucket_name) urls = [ ] for count, ind in enumerate(query_inds): name = cat + '_' + str(meta['id'][ind]) + '.jpeg' url = 'https://s3.amazonaws.com/' + bucket_name + '/' + name urls.append(url) if not dummy_upload: if count % 100 == 0: print str(count) + ' of ' + str(len(query_inds)) k = b.new_key(name) k.set_contents_from_filename(meta['filename'][ind], policy='public-read') return urls
def preprocess_lables(larray): larray = larray[:] labels_unique = np.unique(larray) s = larray.argsort() cat_s = larray[s] ss = np.array([0] + ((cat_s[1:] != cat_s[:-1]).nonzero()[0] + 1).tolist() + [len(cat_s)]) ssd = ss[1:] - ss[:-1] labels = np.repeat(np.arange(len(labels_unique)), ssd) larray = labels[fast.perminverse(s)] return larray.astype(np.int64)
def main(): print "parsing options..." parser = OptionParser() parser.add_option("-f", "--feature-dir", dest="feature_dir") parser.add_option("-n", "--Nsubsample", type="int", dest="Nsub", default=None) parser.add_option("-m", "--metric", dest="metric", default='pn') (options, args) = parser.parse_args() feature_dir = options.feature_dir Nsub = options.Nsub metric = options.metric fds = feature_dir.split(',') features = cPickle.load(open(fds[0])) #features = assemble_feature_batches(fds[0]) perm = np.random.RandomState(0).permutation(features.shape[0]) pinv = fast.perminverse(perm) features = features[pinv] if len(fds) > 1: features2 = cPickle.load(open(fds[1])) # features2 = assemble_feature_batches(fds[1]) perm = np.random.RandomState(0).permutation(features2.shape[0]) pinv = fast.perminverse(perm) features = np.append( features, features2[pinv], axis=1 ) if Nsub is not None: features = features[pinv][:,perm[:Nsub]] RR = {} if 'p' in metric: print "Compute hvm performance..." RR['p'] = compute_perf(features, var_levels=['V6']) if 'n' in metric: print "Compute hvm neural fit..." RR['n'] = compute_nfit(features) cPickle.dump( RR, open(fds[0] + "result_hvm_" + str(Nsub) + ".p", "wb") )
def assemble_feature_batches(feat_dir, N=None, seed=0, batch_range=None, shape_reduce=None, perm=None): bns = get_batches(feat_dir) if batch_range is not None: bns = bns[batch_range[0]: batch_range[1]] data = [] for x in bns: ft = unpickle(os.path.join(feat_dir, 'data_batch_%d' % x))['data'] if shape_reduce is not None: nf0, nf1 = ft.shape s = int(np.sqrt(nf1 / float(shape_reduce))) ft = ft.reshape((nf0, shape_reduce, s, s)) ft1 = ft.mean(2).mean(2) ft2 = ft.sum(1).reshape((nf0, s * s)) if N is not None: nf1 = ft1.shape[1] nf2 = ft2.shape[1] rng = np.random.RandomState(seed=seed) units1 = rng.permutation(nf1)[: N/2] units2 = rng.permutation(nf2)[: N - len(units1)] ft = np.column_stack([ft1[:, units1], ft2[:, units2]]) else: ft = np.column_stack([ft1, ft2]) elif N is not None: print('subsetting batch %d' % x) ft = ft[:, np.random.RandomState(seed=seed).permutation(ft.shape[1])[:N]] #data.append(unpickle(os.path.join(feat_dir, 'data_batch_%d' % x))['data']) data.append(ft) data = np.row_stack(data) if perm == 'random': _perm = np.random.RandomState(0).permutation(len(data)) pinv = fast.perminverse(_perm) data = data[pinv] elif perm == None: pass else: raise ValueError("unknown perm type: %s" % perm) return data
if '.p' in feature_file: features = cPickle.load(open(feature_file)) elif '.h5' in feature_file: f_feat = h5py.File(feature_file, 'r') bns = ['bn'+str(i) for i in range(23)] F = f_feat[bns[0]][:].copy() for bn in bns[1:]: Fbn = f_feat[bn][:].copy() F = np.append(F, Fbn, axis=0) if 'hvm' in feature_file: F = F[:5760] features = F else: print "Incorrect feature file extension!" pinv = fast.perminverse(perm) if len(features.shape) > 2: print "reshaping..." features = np.reshape(features, (features.shape[0], features.shape[1]*features.shape[2]*features.shape[3])) if (nfeat is not None) and (features.shape[1] > nfeat): print "Select " + str(nfeat) + " features..." perm = np.random.RandomState(0).permutation(features.shape[1]) features = features[:,perm[:nfeat]] if features.shape[1] > 5760: print "random select..." perm = np.random.RandomState(0).permutation(features.shape[1]) features = features[:,perm[:5760]] features = features[pinv] ## hvm tasks #print "Computing for hvm tasks..."
from rosch_analysis import rosch_analysis import numpy as np import yamutils.fast as fast import cPickle import dldata.stimulus_sets.synthetic.synthetic_datasets as sd dataset = sd.RoschDataset3_simple74testFewer() features = cPickle.load(open("/om/user/hyo/caffe/features/catInet_roschSimple74testFewer_fc7.p")) perm = np.random.RandomState(0).permutation(len(features)) pinv = fast.perminverse(perm) features = features[pinv] # start = 792320 # end = 792320 + 23*256 R = rosch_analysis(features, hvm_dataset, ntest=5, do_centroids=False, var_levels=["V6"])