def do_all(runname='brownseds_highz', outfolder=None, regenerate=False, regenerate_stack=False, **opts): if outfolder is None: outfolder = os.getenv('APPS') + '/prospector_alpha/plots/'+runname+'/pcomp/' if not os.path.isdir(outfolder): os.makedirs(outfolder) os.makedirs(outfolder+'data/') stack_opts = { 'sigma_sf':0.3, # scatter in the star-forming sequence, in dex 'nbins_horizontal':3, # number of bins in horizontal stack 'nbins_vertical':4, # number of bins in vertical stack 'horizontal_bin_colors': ['#45ADA8','#FC913A','#FF4E50'], 'vertical_bin_colors': ['red','#FC913A','#45ADA8','#323299'], 'low_mass_cutoff':9.5, # log(M) where we stop stacking and plotting 'high_mass_cutoff': 11.5, 'ylim_horizontal_sfr': (-0.8,3), 'ylim_horizontal_ssfr': (1e-13,1e-9), 'ylim_vertical_sfr': (-3,3), 'ylim_vertical_ssfr': (1e-13,1e-9), 'xlim_t': (1e7,1.4e10), 'show_disp':[0.16,0.84] # percentile of population distribution to show on plot } filename = outfolder+'data/single_sfh_stack.h5' if os.path.isfile(filename) and regenerate_stack == False: with open(filename, "r") as f: stack = hickle.load(f) else: data = collate_data(runname,filename=outfolder+'data/stacksfh.h5',regenerate=regenerate,**opts) stack = stack_sfh(data,regenerate_stack=regenerate_stack, **stack_opts) hickle.dump(stack,open(filename, "w")) plot_stacked_sfh(stack,outfolder, **stack_opts)
def process_data(): splits = {s: [] for s in ['train', 'test', 'val']} splits['val'] = val_recordings splits['test'] = test_recordings not_train = splits['val'] + splits['test'] for c in categories: # Randomly assign recordings to training and testing. Cross-validation done across entire recordings. c_dir = os.path.join(DATA_DIR, 'raw', c + '/') _, folders, _ = os.walk(c_dir).next() splits['train'] += [(c, f) for f in folders if (c, f) not in not_train] for split in splits: im_list = [] source_list = [] # corresponds to recording that image came from for category, folder in splits[split]: im_dir = os.path.join(DATA_DIR, 'raw/', category, folder, folder[:10], folder, 'image_03/data/') _, _, files = os.walk(im_dir).next() im_list += [im_dir + f for f in sorted(files)] source_list += [category + '-' + folder] * len(files) print 'Creating ' + split + ' data: ' + str(len(im_list)) + ' images' X = np.zeros((len(im_list),) + desired_im_sz + (3,), np.uint8) for i, im_file in enumerate(im_list): im = imread(im_file) X[i] = process_im(im, desired_im_sz) hkl.dump(X, os.path.join(DATA_DIR, 'X_' + split + '.hkl')) hkl.dump(source_list, os.path.join(DATA_DIR, 'sources_' + split + '.hkl'))
def save_hkl_file(filename, data): hkl_filename = filename + '.hkl' try: hkl.dump(data, hkl_filename, mode="w") return True except Exception: print 'remove %s' % hkl_filename
def save_hkl_file(filename, data): hkl_filename = filename + '.hkl' try: hkl.dump(data, hkl_filename, mode="w") return True except Exception: os.remove(hkl_filename)
def process_video(filename): print filename video = extract_frame(filename) video = vid_batch(video) print 'Video Loaded!' net, transformer = caffenet() feats = np.zeros((4096, 0), dtype=np.float32) net.blobs['data'].reshape(15, 3, 227, 227) for x in xrange(video.nbatch): frames = video.video[..., 15*x: 15*(x+1)] #cur_frames = np.zeros((227, 227, 3, 0), dtype=np.uint8) for i in xrange(frames.shape[-1]): cur_frame = frames[..., i] net.blobs['data'].data[i] = transformer.preprocess('data', cur_frame) out = net.forward() cur_data = net.blobs['fc7'].data.T if x == video.nbatch - 1: if video.padded != 0: cur_data = cur_data[...,:(15 - video.padded)] feats = np.concatenate((feats, cur_data), axis=1) print feats.shape out_file = filename.replace('E001', 'E001_fc7') out_file = out_file.replace('mp4', 'hkl') hkl.dump({'feats':feats}, out_file) return
def save(filepath, data, svL=1, fmt='pkl'): """ Save data as a pickle-format file. Input filepath - file name data - data svL - save level, 0 | {1} | 2 0: write to pathDst even it exist 1: write to pathDst even it exist 2: not write to pathDst if it exist fmt - format, {'pkl'} | 'hkl' | 'h5' """ if svL == 0 or filepath is None: return # create fold if not exist foldPath = os.path.dirname(filepath) mkDir(foldPath) if fmt == 'pkl': # use pickle import cPickle with open(filepath, "w") as fo: cPickle.dump(data, fo, protocol=cPickle.HIGHEST_PROTOCOL) elif fmt == 'hkl': # use hickle, which is faster for large-scale data # https://github.com/telegraphic/hickle import hickle with open(filepath, "w") as fo: hickle.dump(data, fo) else: raise Exception('unknown fmt: {}'.format(fmt))
def test_astropy_time_array(): times = ['1999-01-01T00:00:00.123456789', '2010-01-01T00:00:00'] t1 = Time(times, format='isot', scale='utc') hkl.dump(t1, "test_ap2.h5") t2 = hkl.load("test_ap2.h5") print(t1) print(t2) assert t1.value.shape == t2.value.shape for ii in range(len(t1)): assert t1.value[ii] == t2.value[ii] assert t1.format == t2.format assert t1.scale == t2.scale times = [58264, 58265, 58266] t1 = Time(times, format='mjd', scale='utc') hkl.dump(t1, "test_ap2.h5") t2 = hkl.load("test_ap2.h5") print(t1) print(t2) assert t1.value.shape == t2.value.shape assert np.allclose(t1.value, t2.value) assert t1.format == t2.format assert t1.scale == t2.scale
def config_loop(): global pts active,image = cam_feed.read() while active: cv2.imshow('Perspective',image) key = cv2.waitKey(1) & 0xFF corres_pts = np.array( [ (0,0), (image.shape[1],0),(image.shape[1],image.shape[0]),(0,image.shape[0]) ] ) if len(pts) == 4: pts = np.array(pts) for (x, y) in pts: cv2.circle(image, (x, y), 5, (0, 255, 0), -1) h, _ = cv2.findHomography(pts.astype('float32'), corres_pts.astype('float32')) print 'Tranformation Matrix : ',h # save to config file config = { 'h' : h } config['threshold'] = 15 config['dilate'] = 10 hkl.dump(config,'.config') # warp image warped = cv2.warpPerspective(image, h, (image.shape[1],image.shape[0]) ) cv2.imshow('Perspective',image) cv2.imshow('warped',warped) # pause cv2.waitKey(-1) break
def test(): #im_file = caffe_root+'examples/images/cat.jpg' im_file = '/home/bill/Dropbox/Cox_Lab/Illusions/images/T_illusion.jpg' layer1 = 'pool2' layer2 = 'conv2' save_file = '/home/bill/Dropbox/Cox_Lab/Illusions/misc/T_feats2_'+layer1+'_notoversample.hkl' save_file2 = '/home/bill/Dropbox/Cox_Lab/Illusions/misc/T_recon_'+layer1+'_0-2tran.jpg' if not os.path.isfile(save_file): feats = get_features(im_file, layer1) hkl.dump(feats, open(save_file, 'w')) else: feats = hkl.load(open(save_file)) #feats = feats.reshape((10,256,6,6)) recon_im = get_recon(feats, layer2) # img = Image.fromarray(recon_im, 'RGB') pdb.set_trace() #recon_im[recon_im<0] = 0 #recon_im = recon_im/255 plt.imshow(recon_im) #, cmap='Greys_r') plt.show(block=False) plt.savefig(save_file2) pdb.set_trace()
def pickle_dataset(input_pkl, output_pkl, img_path, id_label, PIXELS): data = pd.read_pickle(input_pkl) dataset = {} iter_images = iter(data[id_label]) first_image = next(iter_images) im = Image.open(img_path + first_image + '.jpg', 'r') im = ImageOps.fit(im, (PIXELS, PIXELS), Image.ANTIALIAS) im = (np.array(im)) r = im[:, :, 0].flatten() g = im[:, :, 1].flatten() b = im[:, :, 2].flatten() img_list = np.array(list(r) + list(g) + list(b), dtype='uint8') img_list = img_list[np.newaxis, :] for img_name in iter_images: im = Image.open(img_path + img_name + '.jpg', 'r') im = ImageOps.fit(im, (PIXELS, PIXELS), Image.ANTIALIAS) im = (np.array(im)) r = im[:, :, 0].flatten() g = im[:, :, 1].flatten() b = im[:, :, 2].flatten() img = np.array(list(r) + list(g) + list(b), dtype='uint8') img_list = np.vstack((img_list, img[np.newaxis, :])) hkl.dump(img_list, output_pkl + '_data.hpy', mode='w', compression='gzip') hkl.dump(data['label'], output_pkl + '_labels.hpy', mode='w') del img_list del data
def SaveBigDict(filename, root): if filename[-4:]!=".hkl": filename+=".hkl" if "GammaWStatis" in root.keys(): gammaw=root["GammaWStatis"] gammaw["WeightAccu"]=array(gammaw["WeightAccu"], dtype=complex64) hkl.dump(root, "_"+filename, mode='w', compression='gzip') os.rename("_"+filename, filename)
def test_astropy_quantity_array(): a = Quantity([1,2,3], unit='m') hkl.dump(a, "test_ap.h5") b = hkl.load("test_ap.h5") assert np.allclose(a.value, b.value) assert a.unit == b.unit
def test_astropy_angle_array(): a = Angle([1,2,3], unit='degree') hkl.dump(a, "test_ap.h5") b = hkl.load("test_ap.h5") assert np.allclose(a.value, b.value) assert a.unit == b.unit
def safe_store_h(path, o): print 'storing hkl:' + path directory = path[:path.rfind('/')] if not os.path.exists(directory): os.makedirs(directory) with open(path, "w") as f: hkl.dump(o, f) f.close()
def test_astropy_angle(): for uu in ['radian', 'degree']: a = Angle(1.02, unit=uu) hkl.dump(a, "test_ap.h5") b = hkl.load("test_ap.h5") assert a == b assert a.unit == b.unit
def save_weights(self, f_weights): ## previously saved as :: ca.W.get_value(borrow=True) to_hickle = dict( W = self.W.get_value(borrow=True), b = self.b.get_value(borrow=True), b_prime = self.b_prime.get_value(borrow=True), ) hickle.dump(to_hickle, f_weights, mode='w', compression='gzip')
def saveToHickle(array, name): """Save a numpy array to a hickle/HDF5 format binary file.""" try: import hickle except: raise Exception("### The Hickle package is required!") output = open(name, 'w') hickle.dump(array, output, protocol=2) output.close()
def collate_data(runname, filename=None, regenerate=False, **opts): """ pull out all of the necessary information from the individual data files this takes awhile, so this data is saved to disk. """ # if it's already made, load it and give it back # else, start with the making! if os.path.isfile(filename) and regenerate == False: print 'loading all data' with open(filename, "r") as f: outdict=hickle.load(f) return outdict # define output containers outvar = ['stellar_mass','sfr_30', 'sfr_100','half_time'] outdict = {q: {f: [] for f in ['q50','q84','q16']} for q in outvar} for f in ['objname','agebins', 'weights', 'z_fraction']: outdict[f] = [] # we want MASS, SFR_100, Z_FRACTION CHAIN, and AGEBINS for each galaxy pfile.run_params['zred'] = None # make sure this is reset basenames = find_all_prospector_results(runname) for i, name in enumerate(basenames): # load output from fit try: res, _, model, prosp = load_prospector_data(name) except: print name.split('/')[-1]+' failed to load. skipping.' continue if (res is None) or (prosp is None): continue outdict['objname'] += [name.split('/')[-1]] print 'loaded ' + outdict['objname'][-1] # agebins (and generate model) pfile.run_params['objname'] = outdict['objname'][-1] model = pfile.load_model(**pfile.run_params) outdict['agebins'] += [model.params['agebins']] # zfraction zidx = model.theta_index['z_fraction'] outdict['z_fraction'] += [res['chain'][prosp['sample_idx'], zidx]] outdict['weights'] += [prosp['weights']] # extra variables for v in outvar: for f in ['q50','q84','q16']: outdict[v][f] += [prosp['extras'][v][f]] # dump files and return hickle.dump(outdict,open(filename, "w")) return outdict
def create_moving_line(nt, line_len, nx, x0, y0, speed): X = np.zeros((nt, nx, nx)).astype(np.float32) for i in range(nt): xt = x0+i*speed X[i,y0:y0+line_lin,xt] = 1 file_name = 'line.hkl' hkl.dump(X, open(file_name, 'w')) X = hkl.load(open(file_name))
def test_embedded_array(): """ See https://github.com/telegraphic/hickle/issues/24 """ d_orig = [[np.array([10., 20.]), np.array([10, 20, 30])], [np.array([10, 2]), np.array([1.])]] hickle.dump(d_orig, 'test.h5') d_hkl = hickle.load('test.h5') for ii, xx in enumerate(d_orig): for jj, yy in enumerate(xx): assert np.allclose(d_orig[ii][jj], d_hkl[ii][jj]) print d_hkl print d_orig
def scanImage(img,numStages,Npos): return_coordinate_list = [] integral_image_list, coordinate_list = getIntegralImages(1280, 1600) for idx, I in enumerate(integral_image_list): is_face = cascade(I, Npos, numStages) k,j = coordinate_list[idx] if is_face == 1: #cv2.rectangle(img, (k,j), (k+64, j+64), (255,0,0), 3) return_coordinate_list.append((k,j)) print idx hkl.dump(return_coordinate_list,'return_coordinate_list'+ str(class_numStages) +'.hkl') return return_coordinate_list
def test_list_order(): """ https://github.com/telegraphic/hickle/issues/26 """ d = [np.arange(n + 1) for n in range(20)] hickle.dump(d, 'test.h5') d_hkl = hickle.load('test.h5') try: for ii, xx in enumerate(d): assert d[ii].shape == d_hkl[ii].shape for ii, xx in enumerate(d): assert np.allclose(d[ii], d_hkl[ii]) except AssertionError: print d[ii], d_hkl[ii] raise
def save_to_internal(self, data): """save """ if self.filetype is "pickle": pickle.dump(data, open(self.location_internal, "wb")) elif self.filetype is "hickle": import hickle hickle.dump(data, open(self.location_internal, "wb")) else: raise ValueError( "Invalid filetype {} (must be {} or {})".format( self.filetype, "pickle", "hickle" ) )
def test_astropy_skycoord(): ra = Angle(['1d20m', '1d21m'], unit='degree') dec = Angle(['33d0m0s', '33d01m'], unit='degree') radec = SkyCoord(ra, dec) hkl.dump(radec, "test_ap.h5") radec2 = hkl.load("test_ap.h5") assert np.allclose(radec.ra.value, radec2.ra.value) assert np.allclose(radec.dec.value, radec2.dec.value) ra = Angle(['1d20m', '1d21m'], unit='hourangle') dec = Angle(['33d0m0s', '33d01m'], unit='degree') radec = SkyCoord(ra, dec) hkl.dump(radec, "test_ap.h5") radec2 = hkl.load("test_ap.h5") assert np.allclose(radec.ra.value, radec2.ra.value) assert np.allclose(radec.dec.value, radec2.dec.value)
def test_astropy_quantity(): for uu in ['m^3', 'm^3 / s', 'kg/pc']: a = Quantity(7, unit=uu) hkl.dump(a, "test_ap.h5") b = hkl.load("test_ap.h5") assert a == b assert a.unit == b.unit a *= a hkl.dump(a, "test_ap.h5") b = hkl.load("test_ap.h5") assert a == b assert a.unit == b.unit
def reweigh(Npos): total_images = Npos * 2 features = hkl.load('features'+str(Npos)+'.hkl') label = np.zeros((total_images, 1)) label[:,0] = [1]*Npos + [0]*Npos weight = np.ones((total_images,1)) / total_images feature_index_list = [] alpha_list = [] theta_list = [] polarity_list = [] best_result_list = [] for t in xrange(wcnum): currentMin, theta, polarity, featureIdx, bestResult = getWeakClassifier(features,weight,label,Npos) alpha = log((1 - currentMin)/currentMin) / 2.0 Z = 2.0 * sqrt( currentMin * ( 1.0 - currentMin)) feature_index_list.append(featureIdx) alpha_list.append(alpha) theta_list.append(theta) polarity_list.append(polarity) best_result_list.append(bestResult) print "---" print "t", t print "featureIdx", featureIdx for i in xrange(total_images): weight[i,0] = (weight[i,0] * exp(-1 * alpha * label[i] * bestResult[i]))/ Z hkl.dump(feature_index_list,'feature_index_list' + str(Npos) + ".hkl") hkl.dump(alpha_list,'alpha_list' + str(Npos) + ".hkl") hkl.dump(theta_list,'theta_list' + str(Npos) + ".hkl") hkl.dump(polarity_list,'polarity_list' + str(Npos) + ".hkl") hkl.dump(best_result_list,'best_result_list' + str(Npos) + ".hkl")
def test_astropy_table(): t = Table([[1, 2], [3, 4]], names=('a', 'b'), meta={'name': 'test_thing'}) hkl.dump({'a': t}, "test_ap.h5") t2 = hkl.load("test_ap.h5")['a'] print(t) print(t.meta) print(t2) print(t2.meta) print(t.dtype, t2.dtype) assert t.meta == t2.meta assert t.dtype == t2.dtype assert np.allclose(t['a'].astype('float32'), t2['a'].astype('float32')) assert np.allclose(t['b'].astype('float32'), t2['b'].astype('float32'))
def dump_names(ent_feats_dir): st = ['mean','var','median','max','min','max-min'] n = [] n.extend( ['ent_q_diffs_' + str(x) for x in range(21) ]) n.extend( ['ent_q_diffs_' + x for x in st]) n.extend( ['ent_q_diff_diffs_' + str(x) for x in range(21) ]) n.extend( ['ent_q_diff_diffs_' + x for x in st]) for i in range(4): n.extend( ['ent_q_diff_block_' + str(i) + '_' + str(x) for x in range(21) ]) n.extend( ['ent_q_diff_diffs_'+ str(i) + '_' + x for x in st]) n.extend( ['ent_p_' + str(x) for x in range(20) ]) n.extend( ['ent_p_diffs_' + str(x) for x in range(20) ]) hickle.dump(n,os.path.join(ent_feats_dir,'ent_feats_names'))
def parse_data_to_internal(self, data=None): """ Parse data and save to pickle/hickle """ if data is None: data = parse.getdata(open(self.location_dat, "rb"), argnum=self.argnum, close=True) if self.filetype is "pickle": pickle.dump(data, open(self.location_internal, "wb")) elif self.filetype is "hickle": import hickle hickle.dump(data, open(self.location_internal, "wb")) else: raise ValueError( "Invalid filetype {} (must be {} or {})".format( self.filetype, "pickle", "hickle" ) )
def parse_data_to_internal(self, data=None): """Use numpy loadtxt """ if data is None: kwargs = self.kwargs data = np.loadtxt( open(self.location_dat, "rb"), **kwargs ) if self.filetype is "pickle": pickle.dump(data, open(self.location_internal, "wb")) elif self.filetype is "hickle": import hickle hickle.dump(data, open(self.location_internal, "wb")) else: raise ValueError( "Invalid filetype {} (must be {} or {})".format( self.filetype, "pickle", "hickle" ) )
def main(args): cur_dir = os.path.dirname(os.path.realpath(__file__)) root_dir = os.path.dirname(os.path.dirname(cur_dir)) data_dir = os.path.join(root_dir, 'data', 'slxrobot') anno_path = os.path.join(data_dir, 'Annotations') image_path = os.path.join(data_dir, 'Images') mask_path = os.path.join(data_dir, 'Masks') mask_dest_path = os.path.join(root_dir, 'data', 'cache', 'slxrobot', 'Masks') if not os.path.exists(mask_dest_path): os.makedirs(mask_dest_path) classes = [ '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush', 'floor' ] cls_to_id = dict(zip(classes, range(len(classes)))) id_to_cls = dict(zip(range(len(classes)), classes)) image_list = os.listdir(image_path) gt_sdsdb = [] for image in image_list: image_idx = image.split('.')[0] try: image_i = int(image_idx) except: continue anno_file = os.path.join(anno_path, '%s.xml' % image_idx) if not os.path.exists(anno_file): continue tree = ET.ElementTree(file=anno_file) bool_masks = [] boxes = [] gt_classes = [] height = int(tree.find('imagesize').find('nrows').text) width = int(tree.find('imagesize').find('ncols').text) for object in tree.iter('object'): seg = object.find('segm') deleted = object.find('deleted') if int(deleted.text) == 1 or seg is None: continue cls = object.find('name').text if cls in cls_to_id: gt_classes.append(cls_to_id[cls]) else: print( 'Class [%s] is not in the training classes, ignoring it ...' % cls) continue box = seg.find('box') x1 = int(box.find('xmin').text) y1 = int(box.find('ymin').text) x2 = int(box.find('xmax').text) y2 = int(box.find('ymax').text) x1 = np.max((0, x1)) y1 = np.max((0, y1)) x2 = np.min((width - 1, x2)) y2 = np.min((height - 1, y2)) mask = seg.find('mask') mask_text = mask.text mask_file = os.path.join(mask_path, mask_text) mask_im = cv2.imread(mask_file) gray_mask = cv2.cvtColor(mask_im, cv2.COLOR_BGR2GRAY) bool_mask = np.full(gray_mask.shape, False, dtype=bool) mask_row, mask_col = np.where(gray_mask > 1) bool_mask[mask_row, mask_col] = True bool_masks.append(bool_mask) boxes.append([x1, y1, x2, y2]) gt_classes = np.asarray(gt_classes) bool_masks = np.asarray(bool_masks) boxes = np.asarray(boxes) gt_overlaps = np.zeros((gt_classes.size, len(classes)), dtype=np.float32) gt_overlaps[np.arange(gt_overlaps.shape[0]), gt_classes] = 1 max_overlaps = gt_overlaps.max(axis=1) gt_mask_file = os.path.join(mask_dest_path, '%s.hkl' % image_idx) gt_mask_flip_file = os.path.join(mask_dest_path, '%s_flip.hkl' % image_idx) if not os.path.exists(gt_mask_file): print('Saving %s' % gt_mask_file) hkl.dump(bool_masks.astype('bool'), gt_mask_file, mode='w', compression='gzip') if not os.path.exists(gt_mask_flip_file): print('Saving %s' % gt_mask_flip_file) hkl.dump(bool_masks[:, :, ::-1].astype('bool'), gt_mask_flip_file, mode='w', compression='gzip') sdb = { 'boxes': boxes, 'cache_seg_inst': '%s/%s.hkl' % (os.path.relpath(mask_dest_path, cur_dir), image_idx), 'flipped': False, 'gt_classes': gt_classes, 'gt_overlaps': gt_overlaps, 'height': height, 'width': width, 'image': '%s/%s' % (os.path.relpath(image_path, cur_dir), image), 'max_classes': gt_overlaps.argmax(axis=1), 'max_overlaps': max_overlaps, } gt_sdsdb.append(sdb) if args.flip: sdb = { 'boxes': boxes, 'cache_seg_inst': '%s/%s.hkl' % (os.path.relpath(mask_dest_path, cur_dir), image_idx), 'flipped': True, 'gt_classes': gt_classes, 'gt_overlaps': gt_overlaps, 'height': height, 'width': width, 'image': '%s/%s' % (os.path.relpath(image_path, cur_dir), image), 'max_classes': gt_overlaps.argmax(axis=1), 'max_overlaps': max_overlaps, } gt_sdsdb.append(sdb) gt_sdsdb_file = os.path.join(mask_dest_path, 'gt_sdsdb.pkl') with open(gt_sdsdb_file, 'wb') as f: print('Length of gt_sdsdb:', len(gt_sdsdb)) pkl.dump(gt_sdsdb, f, protocol=pkl.HIGHEST_PROTOCOL)
def cache(n_apparent, n_true, inl_stats, R_errs, t_errs): hkl.dump([n_apparent, n_true, inl_stats, R_errs, t_errs], open(path(), 'w'))
def main(): for fn in sorted(os.listdir(DATA_DIR)): print fn if (fn[-3:] == 'hkl'): OUTPUT_DIR_IMAGES = OUTPUT_DIR + fn[0:-4] + '/' if not os.path.exists(OUTPUT_DIR_IMAGES): os.makedirs(OUTPUT_DIR_IMAGES) shape = (128, 128) [ grids, gridglobal_x, gridglobal_y, transforms, vel_east, vel_north, acc_x, acc_y, adjust_indices ] = hkl.load(DATA_DIR + fn) grids = np.array(grids) grids = crop_center(grids, shape[0]) do_plot = True # Toggle me for DOGMA plots! # PARAMETERS p_B = 0.02 # birth probability Vb = 2 * 10**4 # number of new born particles V = 2 * 10**5 # number of consistent particles state_size = 4 # number of states: p,v: 4 alpha = 0.9 # information ageing (discount factor) p_A = 1.0 # association probability: only relevant for Doppler measurements T = 0.1 # measurement frequency (10 Hz) p_S = 0.99 # particle persistence probability # velocity, acceleration variance initialization scale_vel = 12. scale_acc = 2. # position, velocity, acceleration process noise process_pos = 0.06 process_vel = 2.4 process_acc = 0.2 # print debug values verbose = False # for plotting thresholds mS = 3. # static threshold epsilon = 10. # vel mag threshold epsilon_occ = 0.75 # occ mag threshold # initialize a grid start = time.time() grid_cell_array = GridCellArray(shape, p_A) end = time.time() print "grid_cell_array initialization took", end - start # initialize a particle array start = time.time() particle_array = ParticleArray(V, grid_cell_array.get_shape(), state_size, T, p_S, scale_vel, scale_acc, process_pos, process_vel, process_acc) end = time.time() print "particle_array initialization took", end - start # data: [N x 2 x W x D] # second dimension is masses {0: m_free, 1: m_occ} # in original grid: 0: unknown, 1: occupied, 2: free (raw data) data = create_DST_grids(grids) # number of measurements in the run N = data.shape[0] # list of 4x256x256 grids with position, velocity information DOGMA = [] var_x_vel = [] var_y_vel = [] covar_xy_vel = [] var_x_acc = [] var_y_acc = [] covar_xy_acc = [] # run particle filter iterations for i in range(N): start = time.time() # initializes a measurement cell array meas_free = data[i, 0, :, :].flatten() meas_occ = data[i, 1, :, :].flatten() meas_cell_array = MeasCellArray(meas_free, meas_occ, grid_cell_array.get_shape(), pseudoG=1.) # algorithm 1: ParticlePrediction (stored in particle_array) ParticlePrediction(particle_array, grid_cell_array, res=1.0) # algorithm 2: ParticleAssignment (stored in particle_array) ParticleAssignment(particle_array, grid_cell_array) # algorithm 3: OccupancyPredictionUpdate (stored in grid_cell_array) OccupancyPredictionUpdate(meas_cell_array, grid_cell_array, particle_array, p_B, alpha, check_values=verbose) # algorithm 4: PersistentParticleUpdate (stored in particle_array) PersistentParticleUpdate(particle_array, grid_cell_array, meas_cell_array, check_values=verbose) # algorithm 5: NewParticleInitialization if p_B == 0: empty_array = True else: empty_array = False birth_particle_array = ParticleArray( Vb, grid_cell_array.get_shape(), state_size, T, p_S, scale_vel, scale_acc, process_pos, process_vel, process_acc, birth=True, empty_array=empty_array) NewParticleInitialization(Vb, grid_cell_array, meas_cell_array, birth_particle_array, check_values=verbose) # algorithm 6: StatisticMoments (stored in grid_cell_array) StatisticMoments(particle_array, grid_cell_array) if state_size == 4: newDOGMA, new_var_x_vel, new_var_y_vel, new_covar_xy_vel = get_dogma( grid_cell_array, grids, state_size, grids[i, :, :], shape) var_x_vel.append(new_var_x_vel) var_y_vel.append(new_var_y_vel) covar_xy_vel.append(new_covar_xy_vel) # save the velocities at this timestep: no real occupancy grid computed here; we will just use the measurement grid for now DOGMA.append(newDOGMA) # algorithm 7: Resample # skips particle initialization for particle_array_next because all particles will be copied in particle_array_next = ParticleArray(V, grid_cell_array.get_shape(), state_size, T, p_S, \ scale_vel, scale_acc, process_pos, process_vel, process_acc, empty_array = True) Resample(particle_array, birth_particle_array, particle_array_next, check_values=verbose) # switch to new particle array particle_array = particle_array_next particle_array_next = None end = time.time() print "Time per iteration: ", end - start # Plotting: The environment is stored in grids[i] (matrix of values (0,1,2)) # The DOGMA is stored in DOGMA[i] if (do_plot): head_grid = dogma2head_grid(DOGMA[i], var_x_vel[i], var_y_vel[i], covar_xy_vel[i], mS, epsilon, epsilon_occ) occ_grid = grids[i, :, :] title = "DOGMa Iteration %d" % i colorwheel_plot(head_grid, occ_grid=occ_grid, m_occ_grid=DOGMA[i][0, :, :], title=os.path.join(OUTPUT_DIR_IMAGES, title), show=True, save=True) if (((i + 1) % 50 == 0) or (i == N - 1)): hkl.dump([DOGMA, var_x_vel, var_y_vel, covar_xy_vel], os.path.join(OUTPUT_DIR, fn), mode='w') print "DOGMA written to hickle file." print "Iteration ", i, " complete" return
def save_model_params(self, filename): """Pickels the parameters within a Lasagne model.""" data = lasagne.layers.get_all_param_values(self._network) filename = os.path.join('./', filename) with open(filename, 'w') as f: hickle.dump(data, f)
max_lr_contact = max([nb_lr_contacts[item] for item in nb_lr_contacts.keys()]) #normalization print("> extract normalized Hi-C data... ") hr_contacts_norm_dict = {item:np.log2(hr_contacts_dict[item]*max_hr_contact/sum(sum(hr_contacts_dict[item]))+1) for item in hr_contacts_dict.keys()} lr_contacts_norm_dict = {item:np.log2(lr_contacts_dict[item]*max_lr_contact/sum(sum(lr_contacts_dict[item]))+1) for item in lr_contacts_dict.keys()} max_hr_contact_norm={item:hr_contacts_norm_dict[item].max() for item in hr_contacts_dict.keys()} max_lr_contact_norm={item:lr_contacts_norm_dict[item].max() for item in lr_contacts_dict.keys()} # WRITE NB CONTACT FILES nb_hr_contactsFile = os.path.join(out_dir, out_dir + "_nb_hr_contacts.hkl") hkl.dump(nb_hr_contacts, nb_hr_contactsFile) print("... written: " + nb_hr_contactsFile) nb_lr_contactsFile = os.path.join(out_dir, out_dir + "_nb_lr_contacts.hkl") hkl.dump(nb_lr_contacts,nb_lr_contactsFile) print("... written: " + nb_lr_contactsFile) # WRITE MAX CONTACT FILES max_hr_contact_normFile = os.path.join(out_dir, out_dir + "_max_hr_contact_norm.hkl") hkl.dump(max_hr_contact_norm,max_hr_contact_normFile) print("... written: " + max_hr_contact_normFile) max_lr_contact_normFile = os.path.join(out_dir, out_dir + "_max_lr_contact_norm.hkl") hkl.dump(max_lr_contact_norm,max_lr_contact_normFile) print("... written: " + max_lr_contact_normFile)
# NORMALIZATION NOT DONE HERE print("> extract normalized Hi-C data... ") #coexpr_contacts_norm_dict = {item:np.log2(coexpr_contacts_dict[item]*max_coexpr_contact/sum(sum(coexpr_contacts_dict[item]))+1) for item in coexpr_contacts_dict.keys()} #hic_contacts_norm_dict = {item:np.log2(hic_contacts_dict[item]*max_hic_contact/sum(sum(hic_contacts_dict[item]))+1) for item in hic_contacts_dict.keys()} #max_coexpr_contact_norm={item:coexpr_contacts_norm_dict[item].max() for item in coexpr_contacts_dict.keys()} #max_hic_contact_norm={item:hic_contacts_norm_dict[item].max() for item in hic_contacts_dict.keys()} # STILL SET THE VARIABLES BECAUSE USED IN THE FUNCTIONS coexpr_contacts_norm_dict = coexpr_contacts_dict hic_contacts_norm_dict = hic_contacts_dict max_coexpr_contact={item:coexpr_contacts_dict[item].max() for item in coexpr_contacts_dict.keys()} max_hic_contact={item:hic_contacts_dict[item].max() for item in hic_contacts_dict.keys()} # WRITE NB CONTACT FILES nb_coexpr_contactsFile = os.path.join(out_dir, out_dir + "_nb_coexpr_contacts.hkl") hkl.dump(nb_coexpr_contacts, nb_coexpr_contactsFile) print("... written: " + nb_coexpr_contactsFile) nb_hic_contactsFile = os.path.join(out_dir, out_dir + "_nb_hic_contacts.hkl") hkl.dump(nb_hic_contacts,nb_hic_contactsFile) print("... written: " + nb_hic_contactsFile) # WRITE MAX CONTACT FILES #max_coexpr_contact_normFile = os.path.join(out_dir, out_dir + "_max_coexpr_contact_norm.hkl") #hkl.dump(max_coexpr_contact_norm,max_coexpr_contact_normFile) #print("... written: " + max_coexpr_contact_normFile) #max_hic_contact_normFile = os.path.join(out_dir, out_dir + "_max_hic_contact_norm.hkl") #hkl.dump(max_hic_contact_norm,max_hic_contact_normFile) #print("... written: " + max_hic_contact_normFile)
def save_hickle_file(filename, data): check_cache() filename = filename + '.hickle' with open(filename, 'w') as f: hickle.dump(data, f, mode='w')
for part in range(part_num): print "part", part, "of %s features" % split anno_path = '/home/jason6582/sfyc/attention-tensorflow/mscoco/cocodata/%s/%s.annotations_%s.pkl'\ % (split, split, str(part)) save_path = '/home/jason6582/sfyc/attention-tensorflow/mscoco/feature_2048/%s/%s.features_%s.hkl'\ % (split, split, str(part)) with open(anno_path, 'rb') as f: annotations = pickle.load(f) image_path = list(annotations['file_name'].unique()) n_examples = len(image_path) all_feats = np.ndarray([n_examples, 2048], dtype=np.float32) for start, end in zip( range(0, n_examples, batch_size), range(batch_size, n_examples + batch_size, batch_size)): image_batch_file = image_path[start:end] image_batch = np.array(map(lambda x: ndimage.imread(x, mode='RGB'),\ image_batch_file)) image_batch = image_batch.astype(np.float32) image_batch = np.transpose(image_batch, (0, 3, 1, 2)) image_batch = torch.Tensor(image_batch).cuda() image_var = Variable(image_batch, volatile=True).cuda() feats = resnet152(image_var) feats = np.reshape(feats.data.cpu().numpy(), [-1, 2048]) # feats = np.transpose(feats, (0, 2, 1)) all_feats[start:end, :] = feats print("Processed %d %s features.." % (end, split)) # use hickle to save huge feature vectors hickle.dump(all_feats, save_path) print("Saved %s.." % (save_path))
def save_results(results_list: list, prefix: str): """Saves the results of a simulation run to disk. results_list is a list of tuples, where each tuple consists of a compressed, pickled dict and a dict of the shapes of the data in the compressed dict (metadata)""" # We use the first metadata to infer basic shape information current_shapes = results_list[0][1] replications = len(results_list) types = { "total_cash": np.float_, "total_excess_capital": np.float_, "total_profitslosses": np.float_, "total_contracts": np.int_, "total_operational": np.int_, "total_reincash": np.float_, "total_reinexcess_capital": np.float_, "total_reinprofitslosses": np.float_, "total_reincontracts": np.int_, "total_reinoperational": np.int_, "total_catbondsoperational": np.int_, "market_premium": np.float_, "market_reinpremium": np.float_, "cumulative_bankruptcies": np.int_, "cumulative_market_exits": np.int_, "cumulative_unrecovered_claims": np.float_, "cumulative_claims": np.float_, "cumulative_bought_firms": np.int_, "cumulative_nonregulation_firms": np.int_, "market_diffvar": np.float_, # Would store these two as an array of lists, but hdf5 can't do that "rc_event_schedule_initial": np.object, "rc_event_damage_initial": np.object, "number_riskmodels": np.int_, "unweighted_network_data": np.float_, "network_node_labels": np.float_, "network_edge_labels": np.float_, "number_of_agents": np.int_, "insurance_cumulative_dividends": np.float_, "reinsurance_cumulative_dividends": np.float_, # These are the big ones, so we need to pay attention to data types "insurance_firms_cash": np.float32, "reinsurance_firms_cash": np.float32, "insurance_contracts": np.uint16, "reinsurance_contracts": np.uint16, } # bad_logs are the logs that don't have a consistent size between replications bad_logs = [ "rc_event_schedule_initial", "rc_event_damage_initial", "insurance_contracts", "insurance_firms_cash", "reinsurance_contracts", "reinsurance_firms_cash", ] event_info_names = ["rc_event_schedule_initial", "rc_event_damage_initial"] logs_found = current_shapes.keys() for name in logs_found: if name not in types: print(f"Warning: type of log {name} not known, assuming float") types[name] = np.float_ shapes = {} for name in logs_found: if name not in bad_logs: # These are mostly standard 1-d timeseries, but may also include stuff like no_riskmodels shapes[name] = (replications, ) + current_shapes[name] else: # We could probably do this for all of the data, but this is fine for now. # These are sets of timeseries: the sets have variable size (also the event schedules) # We use the uncompressed metadata found_shapes = [result[1][name] for result in results_list] # This only works because the shapes only vary in one dimension (tuple comparison is lexicographic) shapes[name] = (replications, ) + max(found_shapes) # Make a skeleton data structure so we only need to have one uncompressed log in memory at a time results_dict = { name: np.zeros(shape=shapes[name], dtype=types[name]) for name in current_shapes.keys() } # results_dict is a dictionary of numpy arrays, should be efficient to store. # The event schedules/damages are of differing lengths. Could pad them with NaNs, but probably # would be more trouble than it's worth for i, result_tuple in enumerate(results_list): result = pickle.loads(zlib.decompress(result_tuple[0])) for name in results_dict: if (name not in event_info_names) and hasattr( result[name], "__len__"): arr = np.asarray(result[name]) shape_slice = tuple([slice(i) for i in arr.shape]) results_dict[name][i][shape_slice] = result[name] else: results_dict[name][i] = result[name] # Need to do a little pre-processing for key in list(results_dict.keys()): if not isinstance(results_dict[key], np.ndarray): raise ValueError(f"Results_dict[{key}] is not an array") if results_dict[key].size == 0: del results_dict[key] continue if results_dict[key].dtype == np.object: results_dict[key] = results_dict[key].tolist() data = results_dict # data = (True, (results_dict, event_info)) # We store everything in one file(!) filename = "data/" + prefix + "_full_logs.hdf" if os.path.exists(filename): # Don't want to blindly overwrite, so make backups import time backupfilename = filename + "." + time.strftime("%Y-%m-%dT%H%M%S") os.rename(filename, backupfilename) # data is a tuple, first element indicating whether the logs are slim, second element being the data # TODO: Make everything else work with this new format # Import here so sandman never tries to import import hickle hickle.dump(data, filename, compression="gzip")
def savefile(history, path): # if not os.path.exists(path): # os.makedirs(path) hkl.dump(history, path)
def main(): for fn in sorted(os.listdir(DATA_DIR)): if (fn[-3:] == 'hkl'): OUTPUT_DIR_IMAGES = OUTPUT_DIR + fn[0:-4] + '/' if not os.path.exists(OUTPUT_DIR_IMAGES): os.makedirs(OUTPUT_DIR_IMAGES) print fn [DOGMA, var_x_vel, var_y_vel, covar_xy_vel] = hkl.load(os.path.join(DATA_DIR, fn)) # posO,posF,velX,velY,meas_grid DOGMA = np.array(DOGMA) var_x_vel = np.array(var_x_vel) var_y_vel = np.array(var_y_vel) covar_xy_vel = np.array(covar_xy_vel) do_plot = True # Toggle me for DOGMA plots! # velocity, acceleration variance initialization scale_vel = 12. scale_acc = 2. # position, velocity, acceleration process noise process_pos = 0.06 process_vel = 2.4 process_acc = 0.2 # for plotting thresholds mS = 4. # 3. # 4. static threshold epsilon = 10. # vel mag threshold epsilon_occ = 0.95 # 0.75 # occ mag threshold # number of measurements in the run N = DOGMA.shape[0] newDOGMA = mahalanobis_filter(DOGMA, var_x_vel, var_y_vel, covar_xy_vel, mS, epsilon, epsilon_occ) print newDOGMA.shape if not os.path.exists(OUTPUT_DIR): os.makedirs(OUTPUT_DIR) hkl.dump(newDOGMA, os.path.join(OUTPUT_DIR + fn), mode="w") if do_plot: for i in range(N): # Plotting: The environment is stored in grids[i] (matrix of values (0,1,2)) # The DOGMA is stored in DOGMA[i] head_grid = dogma2head_grid(newDOGMA[i, :, :, :], DOGMA[i, 0, :, :], var_x_vel[i], var_y_vel[i], covar_xy_vel[i], mS, epsilon, epsilon_occ) occ_grid = DOGMA[i, 4, :, :] title = str( i) #"DOGMa Sequence %s Iteration %d" % (fn[0:5], i) colorwheel_plot(head_grid, occ_grid=occ_grid, m_occ_grid = DOGMA[i,0,:,:], title=os.path.join(OUTPUT_DIR_IMAGES, title), \ show=True, save=True) print "Iteration ", i, " complete" return
def process_data(): splits = {s: [] for s in ['val']} # 'train', 'test', splits['val'] = val_recordings splits['test'] = test_recordings not_train = splits['val'] + splits['test'] # for c in categories: # Randomly assign recordings to training and testing. Cross-validation done across entire recordings. c_dir = os.path.join(DATA_DIR, 'RAW') # no \ seq_clip_list = {} folders = os.listdir(c_dir) # list(os.walk(c_dir, topdown=False))[-1][-2] for folder in folders: if folder in excluded_list: continue filenames = sorted(glob.glob1(os.path.join(c_dir, folder), '*.jpg')) num_pat = re.compile("([0-9]+)\.") # extract the numbering of frame img_ids = [ int(num_pat.search(filename).group(1)) for filename in filenames ] start_id = min(img_ids) cur_id = start_id start_i = 0 fn_groups = [] groups = [] for i, img_id in enumerate(img_ids): if img_id == cur_id: cur_id += 1 if img_id == img_ids[-1]: fn_groups.append((start_id, cur_id - 1)) groups.append((start_i, i + 1)) else: # if there is discontinuity in frame number, start a new group fn_groups.append((start_id, cur_id - 1)) # frame number groups.append((start_i, i + 1)) # list number # (start_i, end_i + 1), (start_id, end_id) # filename[start_i:i+1] = ['start_id', ... 'end_id'] start_id = img_id start_i = i + 1 cur_id = img_id + 1 # predictive coding! seq_clip_list[folder] = (fn_groups, groups) if 'train' in splits: splits['train'] += [(folder, clip) for clip in fn_groups if (folder, clip) not in not_train] # TODO! for split in splits: t0 = time() im_list = [] source_list = [] # corresponds to recording that image came from for folder, clip in splits[split]: im_dir = os.path.join(DATA_DIR, 'RAW', folder) filenames = sorted(glob.glob1(os.path.join(c_dir, folder), '*.jpg')) fn_groups, groups = seq_clip_list[folder] id_clip = groups[fn_groups.index(clip)] for res in range(downsample_rate): index_rng = range(id_clip[0] + res, id_clip[1], downsample_rate) im_list += [ im_dir + '\\' + f for f in filenames[id_clip[0] + res:id_clip[1]:downsample_rate] ] source_list += [ folder + '-%d_%d-%d' % (clip[0], clip[1], res) ] * len(index_rng) print('Creating ' + split + ' data: ' + str(len(im_list)) + ' images') X = np.zeros((len(im_list), ) + desired_im_sz + (3, ), np.uint8) for i, im_file in enumerate(im_list): im = imread(im_file) X[i] = process_im(im, desired_im_sz) hkl.dump(X, os.path.join(DATA_DIR, 'X_' + split + '.hkl')) hkl.dump(source_list, os.path.join(DATA_DIR, 'sources_' + split + '.hkl')) print('Spent %.1f s.' % (time() - t0))
# Práca so súbormi #MATLAB # save myfile # save myfile a b # clear a b # clear # load myfile #----------------------------------------------------------- #PYTHON import sys import dill import hickle dill.dump_session('myfile1.pkl') #ulozi vsetky premenne do suboru myfile hickle.dump([A, B], 'myfile.pkl') #ulozi A,B do suboru myfile del A, B #vymate A,B dill.load_session('myfile1.pkl') #nacita premenne zo suboru myfile #for name in dir(): #print(name) #if not name.startswith('_'): #vymazanie premenných #del globals()[name] #Funkcie #MATLAB #function y = myfunction(x) #a = [-2 -1 0 1]; #y = a + x;
axis=-1)), axis=-1) if not found: evidential_all = evidential_all_current found = True else: evidential_all = np.concatenate( (evidential_all, evidential_all_current), axis=0) source_list += [name] * evidential_all_current.shape[0] if split == 'train': hkl.dump( evidential_all, os.path.join(master_save_folder_double, 'X_' + split + '_prefiltered' + '.hkl')) hkl.dump( source_list, os.path.join(master_save_folder_double, 'sources_' + split + '_prefiltered' + '.hkl')) else: hkl.dump( evidential_all, os.path.join(master_save_folder_double, 'X_' + split + '.hkl')) hkl.dump( source_list, os.path.join(master_save_folder_double, 'sources_' + split + '.hkl')) hkl.dump(
def main(): # batch size for extracting feature vectors from vggnet. batch_size = 100 # maximum length of caption(number of word). if caption is longer than max_length, deleted. max_length = 15 #15 # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token. word_count_threshold = 1 # vgg model path vgg_model_path = './data/imagenet-vgg-verydeep-19.mat' #path to resized images i_fp = './image/2014_resized/' #n_images = 67691 #building dataset print 'Start processing caption data' train_dataset = get_caption_data(i_fp, max_length) print 'Finished processing caption data' #train, val, and test --> 70, 15, and 15 train_cutoff = int(0.70 * len(train_dataset)) val_cutoff = int(0.85 * len(train_dataset)) #path to data directory d_fp = './data' if not os.path.exists(d_fp + '/train'): os.makedirs(d_fp + '/train') if not os.path.exists(d_fp + '/val'): os.makedirs(d_fp + '/val') if not os.path.exists(d_fp + '/test'): os.makedirs(d_fp + '/test') save_pickle(train_dataset[:train_cutoff], d_fp + '/train/train.annotations.pkl') save_pickle(train_dataset[train_cutoff:val_cutoff].reset_index(drop=True), d_fp + '/val/val.annotations.pkl') save_pickle(train_dataset[val_cutoff + 1:].reset_index(drop=True), d_fp + '/test/test.annotations.pkl') ################# train, val, and test data saved ##################### for split in ['train', 'val', 'test']: annotations = load_pickle(d_fp + '/%s/%s.annotations.pkl' % (split, split)) if split == 'train': word_to_idx = _build_vocab(annotations=annotations, threshold=word_count_threshold) save_pickle(word_to_idx, d_fp + '/%s/word_to_idx.pkl' % split) captions = _build_caption_vector(annotations=annotations, word_to_idx=word_to_idx, max_length=max_length) save_pickle(captions, d_fp + '/%s/%s.captions.pkl' % (split, split)) file_names, id_to_idx = _build_file_names(annotations) save_pickle(file_names, d_fp + '/%s/%s.file.names.pkl' % (split, split)) image_idxs = _build_image_idxs(annotations, id_to_idx) save_pickle(image_idxs, d_fp + '/%s/%s.image.idxs.pkl' % (split, split)) # prepare reference captions to compute bleu scores later image_ids = {} feature_to_captions = {} i = -1 for caption, image_id in zip(annotations['caption'], annotations['image_id']): if not image_id in image_ids: image_ids[image_id] = 0 i += 1 feature_to_captions[i] = [] feature_to_captions[i].append(caption.lower() + ' .') save_pickle(feature_to_captions, d_fp + '/%s/%s.references.pkl' % (split, split)) print "Finished building %s caption dataset" % split #extract conv5_3 feature vectors vggnet = Vgg19(vgg_model_path) vggnet.build() with tf.Session() as sess: tf.initialize_all_variables().run() for split in ['train', 'val', 'test']: anno_path = d_fp + '/%s/%s.annotations.pkl' % (split, split) save_path = d_fp + '/%s/%s.features.hkl' % (split, split) annotations = load_pickle(anno_path) image_path = list(annotations['file_name'].unique()) n_examples = len(image_path) all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32) for start, end in zip( range(0, n_examples, batch_size), range(batch_size, n_examples + batch_size, batch_size)): print start, '-', end image_batch_file = image_path[start:end] image_batch = np.array( map(lambda x: ndimage.imread(x, mode='RGB'), image_batch_file)).astype(np.float32) feats = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch}) all_feats[start:end, :] = feats print("Processed %d %s features.." % (end, split)) # use hickle to save huge feature vectors hickle.dump(all_feats, save_path) print("Saved %s.." % (save_path))
def main(): with open(os.path.join(DATA_DIR, 'simulation.pickle'), 'rb') as f: start = time.time() # load sensor grid data (list of arrays) [grids, global_x_grid, global_y_grid] = pickle.load(f) # convert to numpy array grids = np.array(grids) end = time.time() print "Loading simulation datatook", end - start, len(grids), grids[0].shape # crop grids to the desired shape shape = (128,128) grids = np.array(grids) grids = crop_center(grids, shape[0]) print grids.shape do_plot = True # Toggle me for DOGMA plots! # PARAMETERS p_B = 0.02 # birth probability Vb = 2*10**4 # number of new born particles V = 2*10**5 # number of consistent particles state_size = 4 # number of states: p,v: 4 alpha = 0.9 # information ageing (discount factor) p_A = 1.0 # association probability: only relevant for Doppler measurements T = 0.1 # measurement frequency (10 Hz) p_S = 0.99 # particle persistence probability res = 1. # resolution of the grid cells # velocity, acceleration variance initialization scale_vel = 12. scale_acc = 2. # position, velocity, acceleration process noise process_pos = 0.06 process_vel = 2.4 process_acc = 0.2 # print debug values verbose = False # for plotting thresholds mS = 3. epsilon = 10. epsilon_occ = 0.75 # index where PF was interrupted index_stopped = 0 # initialize a grid start = time.time() grid_cell_array = GridCellArray(shape, p_A) end = time.time() print "grid_cell_array initialization took", end - start # initialize a particle array start = time.time() particle_array = ParticleArray(V, grid_cell_array.get_shape(), state_size, T, p_S, scale_vel, scale_acc, process_pos, process_vel, process_acc) end = time.time() print "particle_array initialization took", end - start # data: [N x 2 x W x D] # second dimension is masses {0: m_free, 1: m_occ} # in original grid: 0: unknown, 1: occupied, 2: free (raw data) data = create_DST_grids(grids) # number of measurements in the run N = data.shape[0] # list of 4x128x128 grids with position, velocity information DOGMA = [] var_x_vel = [] var_y_vel = [] covar_xy_vel = [] var_x_acc = [] var_y_acc = [] covar_xy_acc = [] # run particle filter iterations for i in range(N): start = time.time() # initializes a measurement cell array meas_free = data[i,0,:,:].flatten() meas_occ = data[i,1,:,:].flatten() meas_cell_array = MeasCellArray(meas_free, meas_occ, grid_cell_array.get_shape(), pseudoG = 1.) # algorithm 1: ParticlePrediction (stored in particle_array) ParticlePrediction(particle_array, grid_cell_array, res=res) # algorithm 2: ParticleAssignment (stored in particle_array) ParticleAssignment(particle_array, grid_cell_array) # algorithm 3: OccupancyPredictionUpdate (stored in grid_cell_array) OccupancyPredictionUpdate(meas_cell_array, grid_cell_array, particle_array, p_B, alpha, check_values = verbose) # algorithm 4: PersistentParticleUpdate (stored in particle_array) PersistentParticleUpdate(particle_array, grid_cell_array, meas_cell_array, check_values = verbose) # algorithm 5: NewParticleInitialization if p_B == 0: empty_array = True else: empty_array = False birth_particle_array = ParticleArray(Vb, grid_cell_array.get_shape(), state_size, T, p_S, scale_vel, scale_acc, process_pos, process_vel, process_acc, birth = True, empty_array = empty_array) NewParticleInitialization(Vb, grid_cell_array, meas_cell_array, birth_particle_array, check_values = verbose) # algorithm 6: StatisticMoments (stored in grid_cell_array) StatisticMoments(particle_array, grid_cell_array) if (i + 1) > index_stopped: newDOGMA, new_var_x_vel, new_var_y_vel, new_covar_xy_vel = get_dogma(grid_cell_array, grids, state_size, grids[i,:,:], shape) var_x_vel.append(new_var_x_vel) var_y_vel.append(new_var_y_vel) covar_xy_vel.append(new_covar_xy_vel) # save the DOGMA at this timestep: before we had occupancy, free, but this is actually not the real occupancy plot # so we will just use the measurement grid for now if (i+1) > index_stopped: DOGMA.append(newDOGMA) # algorithm 7: Resample # skips particle initialization for particle_array_next because all particles will be copied in particle_array_next = ParticleArray(V, grid_cell_array.get_shape(), state_size, T, p_S, \ scale_vel, scale_acc, process_pos, process_vel, process_acc, empty_array = True) Resample(particle_array, birth_particle_array, particle_array_next, check_values = verbose) # switch to new particle array particle_array = particle_array_next particle_array_next = None end = time.time() print "Time per iteration: ", end - start # Plotting: The environment is stored in grids[i] (matrix of values (0,1,2)) # The DOGMA is stored in DOGMA[i] if (do_plot): head_grid = dogma2head_grid(DOGMA[i], var_x_vel[i], var_y_vel[i], covar_xy_vel[i], mS, epsilon, epsilon_occ) occ_grid = grids[i,:,:] title = "DOGMa Iteration %d" % i colorwheel_plot(head_grid, occ_grid=occ_grid, m_occ_grid = DOGMA[i][0,:,:], title=os.path.join(OUTPUT_DIR, title), show=True, save=True) print "Iteration ", i, " complete" hkl.dump([DOGMA, var_x_vel, var_y_vel, covar_xy_vel], os.path.join(OUTPUT_DIR, 'DOGMA.hkl'), mode='w') print "DOGMA written to hickle file." return
def preprocess(p): p.load() #p = EEG.EEG('Patient_2', 'interictal', 17) print p #p.normalize_channels() #p.normalize_overall() print np.shape(p.data), p.data.nbytes # == (16, ~240k) data = p.data #eeg = np.rollaxis(data, 1) #print p.data[0:1, 0:20] global bin_fft, signal_duration, sample_length if bin_fft is None: pow2 = np.log2(p.sample_rate_in_hz * signal_duration_min) sample_length = int(2.0**(int(pow2) + 1)) # in samples, rounds up signal_duration = sample_length / p.sample_rate_in_hz print "Pow2: ", pow2 print "Signal duration : %6.2fsec = %d samples " % ( signal_duration, sample_length, ) ## Matrix that gathers FFT entries into buckets ## Want buckets to be [0 - 0.5 - 1.5 - 2.5 - 3.5 - ... - 49.5] Hz bin_array = np.linspace(0., 49., num=50) ## http://docs.scipy.org/doc/numpy/reference/routines.fft.html#module-numpy.fft #freq = fftpack.rfftfreq(n=sample_length, d=1./p.sample_rate_in_hz) freq = np.fft.rfftfreq(n=sample_length, d=1. / p.sample_rate_in_hz) #print freq[0:100] bin_fft = np.zeros((len(freq), len(bin_array))) for i, bn in enumerate(bin_array): bn_lower = (bin_array[i - 1] + bin_array[i + 0]) / 2. if i > 0 else bn - 0.5 bn_upper = (bin_array[i + 0] + bin_array[i + 1] ) / 2. if i < len(bin_array) - 1 else bn + 0.5 a = np.where((freq > bn_lower) & (freq <= bn_upper), 1, 0) bin_fft[:, i] = a #print bn_lower, bn, bn_upper #print bin_fft[0:20, 0:5] ## Now, take whole period, and find the start times in seconds signal_period_starts = np.arange(start=0, stop=p.length_in_sec - signal_duration, step=signal_period_step) #print signal_period_starts param_length = p.n_channels * np.shape(bin_fft)[1] # len(bin_array) all_params = np.zeros((len(signal_period_starts), param_length), dtype=np.complex64) for i, start_period in enumerate(signal_period_starts): sample_start = int(p.sample_rate_in_hz * start_period) # start time in seconds #z = fftpack.rfft(p.data[:, sample_start:], n=sample_length, axis=1) fft_raw = np.fft.rfft(p.data[:, sample_start:], n=sample_length, axis=1) #print np.shape(fft_raw) #print fft_raw[0:1, 0:20] binned = np.dot(fft_raw, bin_fft) #print np.shape(binned) #print binned[0:1, :] #print binned[0, 0] # Check that first bin is equal to first sums... #print np.sum(fft_raw[0,0:6]) # Works! params = np.log(binned.ravel()) all_params[i, :] = params print np.shape(all_params), all_params.nbytes to_hickle = dict( features=all_params, signal_period_starts=signal_period_starts, ) # Dump data, with compression f = "data/feat/%s/%s_%s_segment_%04d.hickle" % (p.subject, p.subject, p.desc, p.num) hickle.dump(to_hickle, f, mode='w', compression='gzip')
X_test[0,:,:,:,:] = loadmat("../vim2/preprocessed/test.mat")['d'].transpose((0,3,1,2)) print X_test.shape frame = 10 for i in range(nbat): if time.time() - starttime > 72000: break if frame + fperbat >= X_test.shape[1]: frame = X_test.shape[1] - fperbat test_errors = test_model.predict(X_test[:,frame-file_overlap:frame+fperbat,:,:,:], 1) outfile = RESULTS_SAVE_DIR + "/testerr"+str(i)+ ".hkl" print outfile print frame frame += fperbat hkl.dump(test_errors[0,file_overlap:], outfile) # hkl.dump(errs1[:,9,:], RESULTS_SAVE_DIR #+ "errors_frame"+ str(b*batch_size+9+6) + "_" + str((b+1)*batch_size+9+6) #+".hkl") #X_hat = test_model.predict(X_test[1], batch_size) #test_model._make_predict_function() #f = test_model.predict_function #errs1 = f(X_test[0]) #
f_meta = "data/feat/%s/%s_meta_input.hickle" % ( _subject, _subject, ) if True or train_data: # PREVIOUSY :: produce meta-data only from training set per_feature_min = np.min(all_features, axis=0) per_feature_max = np.max(all_features, axis=0) to_hickle = dict( signal_period_starts=signal_period_starts, per_feature_min=per_feature_min, per_feature_max=per_feature_max, ) hickle.dump(to_hickle, f_meta, mode='w', compression='gzip') #else: # from_hickle_meta = hickle.load(f_meta) # per_feature_min = from_hickle_meta['per_feature_min'] # per_feature_max = from_hickle_meta['per_feature_max'] norm_features = (all_features - per_feature_min) / (per_feature_max - per_feature_min) to_hickle = dict(features=norm_features, ) #f_out = "data/feat/%s/%s_%s_input.hickle" % (_subject, _subject, ("train" if train_data else "test"), ) f_out = "data/feat/%s/%s_input.hickle" % ( _subject, _subject,
# tensorboard tensorboard = TensorBoard(log_dir="logs_dogma/{}".format(time()), histogram_freq=1, write_graph=False, write_grads=False, write_images=True) callbacks.append(tensorboard) history = model.fit_generator(train_generator, samples_per_epoch / batch_size, nb_epoch, callbacks=callbacks, \ validation_data=val_generator, validation_steps=N_seq_val / batch_size) # summarize history for loss print(history.history.keys()) plt.plot(history.history['loss']) plt.plot(history.history['val_loss']) plt.title('model loss') plt.ylabel('loss') plt.xlabel('epoch') plt.legend(['train', 'val'], loc='upper left') plt.show() plt.savefig('loss_full_kitti_dogma_t_1.png') # save history in a hickle file hkl.dump(history.history, 'history_full_kitti_dogma_t_1.hkl', mode='w') if save_model: json_string = model.to_json() with open(json_file, "w") as f: f.write(json_string)
type=bool, default=True, help='Save test labels and predicted labels') args = parser.parse_args() seed = args.seed outdir = args.outdir # Load data and create label train_mat = hkl.load(args.train_data).astype(int) test_mat = hkl.load(args.test_data).astype(int) train_label = [1] * 1600 + [0] * 1600 test_label = [1] * 400 + [0] * 400 train_data, train_label = shuffle(train_mat, train_label, random_state=0) test_data, test_label = shuffle(test_mat, test_label, random_state=0) # Model seed = args.seed model = SVC(probability=True) # Train and predict model.fit(train_data, train_label) predict_label = model.predict(test_data) test_posterior_probability = model.predict_proba(test_data) test_label = np.array(test_label) # Save the result if args.save_result: if not os.path.exists(outdir): os.makedirs(outdir) hkl.dump(predict_label, outdir + 'predict_label.hkl') hkl.dump(test_posterior_probability, outdir + 'test_posterior_probability.hkl') hkl.dump(test_label, outdir + 'test_label.hkl')
def main(): # batch size for extracting feature vectors from vggnet. batch_size = 100 # maximum length of caption(number of word). if caption is longer than max_length, deleted. max_length = 15 # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token. word_count_threshold = 1 # vgg model path vgg_model_path = './data/imagenet-vgg-verydeep-19.mat' ##### vgg model-> wget http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat -P data/ caption_file = '/home/most12lee/downloads/data/token_3000imgs.json' image_dir = '/home/most12lee/downloads/data/%s_resized/' # about 80000 images and 400000 captions for train dataset # -> ME: about 2100 images and 10500 captions for train datasets train_dataset = _process_caption_data( caption_file= '/home/most12lee/downloads/data/token_3000imgs_train.json', #### DONT FORGET TO CHANGE CSV INTO JSON FILE!!!!!! image_dir='/home/most12lee/downloads/data/train_resized/', max_length=max_length) # about 40000 images and 200000 captions # -> ME: about 900 images and 4500 captions for val datasets val_dataset = _process_caption_data( caption_file= '/home/most12lee/downloads/data/token_3000imgs_val.json', #### DONT FORGET TO CHANGE CSV INTO JSON FILE!!!!!! image_dir='/home/most12lee/downloads/data/val_resized/', max_length=max_length) # about 4000 images and 20000 captions for val / test dataset # -> ME: about 90 images and 450 captions for val / test datasets val_cutoff = int(0.1 * len(val_dataset)) test_cutoff = int(0.2 * len(val_dataset)) print 'Finished processing caption data' save_pickle(train_dataset, 'data/train/train.annotations.pkl') save_pickle(val_dataset[:val_cutoff], 'data/val/val.annotations.pkl') save_pickle(val_dataset[val_cutoff:test_cutoff].reset_index(drop=True), 'data/test/test.annotations.pkl') for split in ['train', 'val', 'test']: annotations = load_pickle('./data/%s/%s.annotations.pkl' % (split, split)) if split == 'train': word_to_idx = _build_vocab(annotations=annotations, threshold=word_count_threshold) save_pickle(word_to_idx, './data/%s/word_to_idx.pkl' % split) captions = _build_caption_vector(annotations=annotations, word_to_idx=word_to_idx, max_length=max_length) save_pickle(captions, './data/%s/%s.captions.pkl' % (split, split)) file_names, id_to_idx = _build_file_names(annotations) save_pickle(file_names, './data/%s/%s.file.names.pkl' % (split, split)) image_idxs = _build_image_idxs(annotations, id_to_idx) save_pickle(image_idxs, './data/%s/%s.image.idxs.pkl' % (split, split)) # prepare reference captions to compute bleu scores later image_ids = {} feature_to_captions = {} i = -1 for caption, image_id in zip(annotations['caption'], annotations['image_id']): if not image_id in image_ids: image_ids[image_id] = 0 i += 1 feature_to_captions[i] = [] feature_to_captions[i].append(caption.lower() + ' .') save_pickle(feature_to_captions, './data/%s/%s.references.pkl' % (split, split)) print "Finished building %s caption dataset" % split # extract conv5_3 feature vectors vggnet = Vgg19(vgg_model_path) vggnet.build() with tf.Session() as sess: tf.initialize_all_variables().run() for split in ['train', 'val', 'test']: anno_path = './data/%s/%s.annotations.pkl' % (split, split) save_path = './data/%s/%s.features.hkl' % (split, split) annotations = load_pickle(anno_path) image_path = list(annotations['file_name'].unique()) n_examples = len(image_path) all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32) for start, end in zip( range(0, n_examples, batch_size), range(batch_size, n_examples + batch_size, batch_size)): image_batch_file = image_path[start:end] image_batch = np.array( map(lambda x: ndimage.imread(x, mode='RGB'), image_batch_file)).astype(np.float32) feats = sess.run(vggnet.features, feed_dict={vggnet.images: image_batch}) all_feats[start:end, :] = feats print("Processed %d %s features.." % (end, split)) # use hickle to save huge feature vectors hickle.dump(all_feats, save_path) print("Saved %s.." % (save_path))
if not os.path.exists('gedm.hkl'): N = 256 d_2001 = np.zeros([N, N]) d_2016 = np.zeros([N, N]) for ii in range(N): print("%i / %i" % (ii + 1, N)) for jj in range(N): l = float(ii) / N * 360 - 180 b = float(jj) / N * 90 - 45 d_2001[ii, jj] = pyne2001.get_galactic_dm(l, b) dm, tau = pyymw16.dist_to_dm(l, b, 30000) d_2016[ii, jj] = dm.value hkl.dump({'NE2001': d_2001, 'YMW16': d_2016}, 'gedm.hkl') else: d = hkl.load('gedm.hkl') plt.figure(figsize=(9, 9)) plt.subplot(3, 1, 1) plot_gplane(d['NE2001'], 'NE2001') plt.subplot(3, 1, 2) plot_gplane(d['YMW16'], 'YMW16') plt.subplot(3, 1, 3) d_delta = (d['YMW16'] - d['NE2001']) plot_gplane(d_delta, 'Difference') plt.xlabel("gl [deg]") plt.tight_layout() plt.savefig('compare_to_ne2001.png')
def quicklook(filename, save, dump, flag, merge, flatten, no_show, all_lsts, new_cal, sky=False, lfsm=False, emp=False): h5 = tb.open_file(filename) if new_cal: T_ant = apply_new_calibration(h5) else: T_ant = apply_calibration(h5) f_leda = T_ant['f'] ant_ids = ['252', '254', '255'] print("Plotting...") fig = plt.figure(figsize=(20, 20)) #plt.suptitle(h5.filename) lst_stamps = T_ant['lst'] indexes = np.arange(len(lst_stamps), dtype=np.int) if len(lst_stamps) == 0: raise RuntimeError("No LSTs in file") # Report discontinuities in time for i in range(1, len(lst_stamps)): if lst_stamps[i] - lst_stamps[i - 1] > 1 / 60.0: # 1 minute print "Discontinuity at LST", lst_stamps[i], ( lst_stamps[i] - lst_stamps[i - 1]) * 60 * 60, "seconds" utc_stamps = T_ant['utc'] xlims = (f_leda[0], f_leda[-1]) #ylims = mdates.date2num((T_ant['utc'][0], T_ant['utc'][-1])) #hfmt = mdates.DateFormatter('%m/%d %H:%M') ylims = (T_ant['lst'][0], T_ant['lst'][-1]) # Work out altitude of Gal center and Sun. Use whichever is highest # and put that in the padding, which is the stripe. pad_length = 70 padding = np.full((len(lst_stamps), pad_length), 10000) timing = lst_timing.LST_Timing(lst_stamps, utc_stamps) border_bottom, night_bottom, night_top, border_top = timing.calc_night() padding[night_bottom:night_top, :] = 1000 #for ant in ant_ids: # lst_stamps, T_ant[ant+"A"] = timing.align(T_ant[ant+"A"]) # lst_stamps, T_ant[ant+"B"] = timing.align(T_ant[ant+"B"]) if night_bottom: print "Night", lst_stamps[night_bottom], "-", lst_stamps[night_top - 1] else: print "Night 0 - 0" # Use night only if not all_lsts: if not border_top: raise RuntimeError( "No LSTs available at night time (use --all_lsts to see all)") lst_stamps = lst_stamps[night_bottom:night_top] utc_stamps = utc_stamps[night_bottom:night_top] indexes = indexes[night_bottom:night_top] padding = padding[night_bottom:night_top] ylims = (lst_stamps[0], lst_stamps[-1]) print len(lst_stamps), "usable LSTs" else: print "Using all LSTs" if len(lst_stamps) == 0: raise RuntimeError( "There are no data to display (number of LSTs is 0)") yloc = [] ylabel = [] try: for i in range(0, len(lst_stamps), len(lst_stamps) / 7): yloc.append(lst_stamps[i]), ylabel.append(("%.1f" % lst_stamps[i])) except: yloc.append(lst_stamps[0]), ylabel.append(("%.1f" % lst_stamps[0])) yloc.append(lst_stamps[-1]), ylabel.append(("%.1f" % lst_stamps[-1])) if all_lsts: new_x_high = xlims[1] + pad_length * (xlims[1] - xlims[0]) / len(f_leda) else: new_x_high = xlims[1] dump_data = {} if sky: if lfsm and emp: smdl = SkyModelLFSMEmp smlbl = 'LFSM+Emp' elif lfsm and not emp: smdl = SkyModelLFSM smlbl = 'LFSM' elif not lfsm and emp: smdl = SkyModelGSMEmp smlbl = 'GSM+Emp' else: smdl = SkyModelGSM smlbl = 'GSM' sy = smdl(pol='y') sx = smdl(pol='x') T_y_asm = sy.generate_tsky(lst_stamps, f_leda * 1e6) T_x_asm = sx.generate_tsky(lst_stamps, f_leda * 1e6) if flag and merge: # If we are going to merge the flags across antennas, we need to flag them all now for p in (0, 1): for ii, key in enumerate(ant_ids): ant = key + ("B" if p else "A") T_flagged = T_ant[ant] if not all_lsts: # Do flagging with a border around the data in time masks = rfi_flag(T_flagged[border_bottom:border_top], freqs=f_leda) new_mask = masks.combine(do_not_excise_dtv=True) new_mask = new_mask[night_bottom - border_bottom:night_top - border_bottom] # remove border else: masks = rfi_flag(T_flagged, freqs=f_leda) new_mask = masks.combine(do_not_excise_dtv=True) print ant, "Biggest DTV gap", lst_stamps[biggest_gap( masks.dtv_tms)[1]], "-", lst_stamps[biggest_gap( masks.dtv_tms)[0]], "waterfall" try: merged_mask |= new_mask except NameError: merged_mask = new_mask for p in [0, 1]: for ii, key in enumerate(ant_ids): if p == 0 and ii == 0: ax = fig.add_subplot(2, 3, 3 * p + ii + 1) origAX = ax else: ax = fig.add_subplot(2, 3, 3 * p + ii + 1, sharex=origAX, sharey=origAX) if p == 0: ant = key + "A" else: ant = key + "B" T_flagged = T_ant[ant] if not all_lsts: T_flagged = T_flagged[night_bottom:night_top] print "Max", np.max(T_flagged), "Min", np.min(T_flagged) masks = {} if flag: if merge: ## Already done T_flagged = np.ma.array(T_flagged, mask=merged_mask) else: ## Need to do it now - there's probably a way to deal with ## this all in one pass if not all_lsts: masks = rfi_flag(T_ant[ant][border_bottom:border_top], freqs=f_leda) T_flagged = masks.apply_as_mask( T_ant[ant][border_bottom:border_top], do_not_excise_dtv=True) T_flagged = T_flagged[night_bottom - border_bottom:night_top - border_bottom] # Remove border masks.chop(night_bottom - border_bottom, night_top - border_bottom) else: masks = rfi_flag(T_flagged, freqs=f_leda) T_flagged = masks.apply_as_mask(T_flagged, do_not_excise_dtv=True) print ant, "Biggest DTV gap", lst_stamps[biggest_gap( masks.dtv_tms)[1]], "-", lst_stamps[biggest_gap( masks.dtv_tms)[0]], "waterfall" print "After flagging", "Max", np.ma.max( T_flagged), "Min", np.ma.min(T_flagged) try: T_asm = T_y_asm if p == 0 else T_x_asm scale_offset_asm = robust.mean(T_asm / T_flagged) T_flagged = T_flagged - T_asm / scale_offset_asm except NameError: pass T_flagged = pad_data(T_flagged) # Up to 2400 channels if dump: if not all_lsts: if masks: dump_data[ant + "_flagged"] = masks.apply_as_nan( T_ant[ant][night_bottom:night_top]) dump_data[ant] = T_ant[ant][night_bottom:night_top] else: if masks: dump_data[ant + "_flagged"] = masks.apply_as_nan( T_ant[ant]) dump_data[ant] = T_ant[ant] dump_data[ant + "_rms"] = add_uncertainties(T_flagged) av = np.ma.average(T_flagged, axis=0) weighted = av / dump_data[ant + "_rms"]**2 dump_data[ant + "_weighted"] = weighted if masks: dump_data[ant + "_dtv_times"] = np.array(masks.dtv_tms) dump_data[ant + "_masks"] = masks.masks if flag: total = T_flagged.shape[0] * T_flagged.shape[1] num_in = np.ma.MaskedArray.count(T_flagged) print ant, ("%.1f%%" % (100 * float(total - num_in) / total) ), "flagged.", "Count:", total - num_in # Add the stripe onto the right edge of the data and adjust the extent of the x-axis (frequency) to cover the stripe. if all_lsts: T_flagged_plot = np.ma.concatenate((T_flagged, padding), axis=1) else: T_flagged_plot = T_flagged ax.set_yticks(yloc) ax.set_yticklabels(ylabel) ax.tick_params(axis='y', pad=2) if flatten: if type(T_flagged_plot) is np.ma.core.MaskedArray: abp = np.ma.median(T_flagged_plot.data, axis=0) else: abp = np.ma.median(T_flagged_plot, axis=0) abp /= np.ma.median(abp) T_flagged_plot /= abp try: clim = (percentile(T_flagged_plot.compressed(), 5), percentile(T_flagged_plot.compressed(), 95)) except AttributeError: clim = (percentile(T_flagged_plot, 5), percentile(T_flagged_plot, 95)) elif sky: clim = (-250, 500) else: clim = (1000, 10000) if ant != "252B": im = ax.imshow( T_flagged_plot, # / np.median(xx, axis=0), cmap="viridis", aspect='auto', interpolation='nearest', clim=clim, extent=(xlims[0], new_x_high, ylims[1], ylims[0])) ax.set_title(ant) if p == 1: ax.set_xlabel("Frequency [MHz]") if ii == 0: ax.set_ylabel("LST [hr]") #ax.yaxis_date() #ax.yaxis.set_major_formatter(hfmt) # if not flatten: fig.subplots_adjust(left=0.07) fig.subplots_adjust(right=0.875) cbar_ax = fig.add_axes([0.9, 0.125, 0.025, 0.75]) cbar = fig.colorbar(im, cax=cbar_ax) #plt.subplot(2,3,3) #cbar = plt.colorbar() if sky: cbar.set_label("Temperature - %s [K]" % smlbl) else: cbar.set_label("Temperature [K]") cbar.ax.tick_params(axis='y', pad=2) #plt.tight_layout() plt.text(0.005, 0.005, get_repo_fingerprint(), transform=fig.transFigure, size=8) if save: plt.savefig(os.path.basename(filename)[:-3] + ".png") if not no_show: plt.show() if dump: dump_data["lsts"] = lst_stamps dump_data["utcs"] = np.array([str(pytime) for pytime in utc_stamps]) dump_data["indexes"] = indexes dump_data["frequencies"] = pad_frequencies(f_leda) dump_data["options"] = "Flag="+str(flag) \ + " Filename="+filename \ + " New cal="+str(new_cal) \ + " Merge="+str(merge) \ + " Flatten="+str(flatten) \ + " All LSTs="+str(all_lsts) \ + " Sky Model Substract="+str(sky) \ + " Use LFSM="+str(lfsm) \ + " Apply empirical gain correction="+str(emp) dump_data["fingerprint"] = get_repo_fingerprint() import json def jdefault(o): return o.__dict__ dump_data["params"] = json.dumps(params, default=jdefault) hickle.dump(dump_data, os.path.basename(filename)[:-3] + ".hkl")
if not np.isnan( snp): # zero by default, but insert 1 where the geno is assert -1 < snp < 3 # only 0, 1, and 2 or minor homozygous, heterozygous and major homozygous dataset[row][offset] = snp else: dataset[row][offset] = 5 # substitute nan's by 5 offset += 1 if __name__ == '__main__': args = parse_args() print('Called with args:') print(args) snps = Bed(args.snps, count_A1=False) # count_A1 counts the allels numbers phenos = pd.read_csv( '/Users/ioneliabuzatu/PycharmProjects/biobank/obesity/data/cleaned.csv', sep=',')[-25:] phenos = phenos.reset_index() iid_patients = phenos.loc[:, 'f.eid'] data_on(ondisk=snps, patients_=iid_patients) print("making the geno file...") # pd.DataFrame(dataset).to_csv("/Users/ioneliabuzatu/PycharmProjects/biobank/obesity/data/genos.csv", sep=' ', header=None, index=False) hkl.dump( dataset, "/Users/ioneliabuzatu/PycharmProjects/biobank/obesity/data/bmi_val_25.hkl", mode='w')
lr_contacts_norm_dict = { item: np.log2(lr_contacts_dict[item] * max_lr_contact / sum(sum(lr_contacts_dict[item])) + 1) for item in lr_contacts_dict.keys() } max_hr_contact_norm = { item: hr_contacts_norm_dict[item].max() for item in hr_contacts_dict.keys() } max_lr_contact_norm = { item: lr_contacts_norm_dict[item].max() for item in lr_contacts_dict.keys() } hkl.dump(nb_hr_contacts, 'data/%s/nb_hr_contacts.hkl' % cell) hkl.dump(nb_lr_contacts, 'data/%s/nb_lr_contacts.hkl' % cell) hkl.dump(max_hr_contact_norm, 'data/%s/max_hr_contact_norm.hkl' % cell) hkl.dump(max_lr_contact_norm, 'data/%s/max_lr_contact_norm.hkl' % cell) def crop_hic_matrix_by_chrom(chrom, norm_type=0, size=40, thred=200): #thred=2M/resolution #norm_type=0-->raw count #norm_type=1-->log transformation #norm_type=2-->scaled to[-1,1]after log transformation, default #norm_type=3-->scaled to[0,1]after log transformation distance = [] crop_mats_hr = [] crop_mats_lr = []
def save_batches(file_list, tar_dir, img_size=48, batch_size=256, flag_avg=False, num_sub_batch=1): ''' num_sub_batch is for parallelling using multiple gpus, it should be 2, 4, or 8, where the indexing is reverted binary number when 2, the files ends with _0.pkl and _1.pkl when 4, with _00.pkl, _10.pkl, _01.pkl and _11.pkl ''' if not os.path.exists(tar_dir): os.makedirs(tar_dir) img_batch = np.zeros((3, img_size, img_size, batch_size), np.uint8) if flag_avg: img_sum = np.zeros((3, img_size, img_size)) batch_count = 0 count = 0 for file_name in file_list: img_batch[:, :, :, count % batch_size] = \ get_img(file_name, img_size=img_size, batch_size=batch_size) count += 1 if count % batch_size == 0: batch_count += 1 if flag_avg: img_sum += img_batch.mean(axis=3) if num_sub_batch == 1: save_name = '%04d' % (batch_count - 1) + '.hkl' hkl.dump(img_batch, os.path.join(tar_dir, save_name), mode='w') elif num_sub_batch == 2: half_size = batch_size / 2 save_name = '%04d' % (batch_count - 1) + '_0.hkl' hkl.dump(img_batch[:, :, :, :half_size], os.path.join(tar_dir, save_name), mode='w') save_name = '%04d' % (batch_count - 1) + '_1.hkl' hkl.dump(img_batch[:, :, :, half_size:], os.path.join(tar_dir, save_name), mode='w') elif num_sub_batch == 4: q1 = batch_size / 4 q2 = batch_size / 2 q3 = batch_size / 4 * 3 save_name = '%04d' % (batch_count - 1) + '_00.hkl' hkl.dump(img_batch[:, :, :, :q1], os.path.join(tar_dir, save_name), mode='w') save_name = '%04d' % (batch_count - 1) + '_10.hkl' hkl.dump(img_batch[:, :, :, q1:q2], os.path.join(tar_dir, save_name), mode='w') save_name = '%04d' % (batch_count - 1) + '_01.hkl' hkl.dump(img_batch[:, :, :, q2:q3], os.path.join(tar_dir, save_name), mode='w') save_name = '%04d' % (batch_count - 1) + '_11.hkl' hkl.dump(img_batch[:, :, :, q3:], os.path.join(tar_dir, save_name), mode='w') else: NotImplementedError("num_sub_batch has to be 1, 2, or 4") return img_sum / batch_count if flag_avg else None
def quicklook(filename, save, dump, flag, merge, flatten, no_show, all_lsts): h5 = tb.open_file(filename) T_ant = apply_calibration(h5) f_leda = T_ant['f'] ant_ids = ['252', '254', '255'] print("Plotting...") fig = plt.figure(figsize=(12, 12)) #plt.suptitle(h5.filename) lst_stamps = T_ant['lst'] if len(lst_stamps) == 0: print "No LSTS in file" exit(1) # Report discontinuities in time for i in range(1, len(lst_stamps)): if lst_stamps[i] - lst_stamps[i - 1] > 1 / 60.0: # 1 minute print "Discontinuity at LST", lst_stamps[i], ( lst_stamps[i] - lst_stamps[i - 1]) * 60 * 60, "seconds" utc_stamps = T_ant['utc'] xlims = (f_leda[0], f_leda[-1]) #ylims = mdates.date2num((T_ant['utc'][0], T_ant['utc'][-1])) #hfmt = mdates.DateFormatter('%m/%d %H:%M') ylims = (T_ant['lst'][0], T_ant['lst'][-1]) # Work out altitude of Gal center and Sun. Use whichever is highest # and put that in the padding, which is the stripe. unusable_lsts = [] pad_length = 70 padding = np.zeros((len(lst_stamps), pad_length)) for i, d in enumerate(utc_stamps): ovro.date = d sun.compute(ovro) gal_center.compute(ovro) if sun.alt > -15 * np.pi / 180 or gal_center.alt > -15 * np.pi / 180: padding[i, :] = 10000 unusable_lsts.append(i) else: padding[i, :] = 1000 # Delete sun up LSTS if not all_lsts: print "Cutting out times when sun/galaxy up" padding = np.delete(padding, unusable_lsts, axis=0) lst_stamps = np.delete(lst_stamps, unusable_lsts, axis=0) utc_stamps = np.delete(utc_stamps, unusable_lsts, axis=0) if len(lst_stamps) == 0: print "No LSTs available at night time (use --all_lsts to see all)" exit(1) ylims = (lst_stamps[0], lst_stamps[-1]) print len(lst_stamps), "usable LSTs" else: print "Using all LSTs" if len(lst_stamps) == 0: print "There is no data to display (number of LSTs is 0)" exit(1) yloc = [] ylabel = [] for i in range(0, len(lst_stamps), len(lst_stamps) / 7): yloc.append(lst_stamps[i]), ylabel.append(("%.1f" % lst_stamps[i])) if all_lsts: new_x_high = xlims[1] + pad_length * (xlims[1] - xlims[0]) / len(f_leda) else: new_x_high = xlims[1] dump_data = {} if flag and merge: # If we are going to merge the flags across antennas, we need to flag them all now for p in (0, 1): for ii, key in enumerate(ant_ids): ant = key + ("B" if p else "A") T_flagged = T_ant[ant] if not all_lsts: T_flagged = np.delete(T_flagged, unusable_lsts, axis=0) new_mask = rfi_flag(T_flagged, freqs=f_leda).mask try: merged_mask |= new_mask except NameError: merged_mask = new_mask for p in [0, 1]: for ii, key in enumerate(ant_ids): if p == 0 and ii == 0: ax = fig.add_subplot(2, 3, 3 * p + ii + 1) origAX = ax else: ax = fig.add_subplot(2, 3, 3 * p + ii + 1, sharex=origAX, sharey=origAX) if p == 0: ant = key + "A" else: ant = key + "B" T_flagged = T_ant[ant] if not all_lsts: T_flagged = np.delete(T_flagged, unusable_lsts, axis=0) print "Max", np.max(T_flagged), "Min", np.min(T_flagged) if flag: if merge: ## Already done T_flagged = np.ma.array(T_flagged, mask=merged_mask) else: ## Need to do it now - there's probably a way to deal with ## this all in one pass T_flagged = rfi_flag(T_flagged, freqs=f_leda) print "After flagging", "Max", np.ma.max( T_flagged), "Min", np.ma.min(T_flagged) if dump: dump_data[ant] = T_flagged dump_data[ant + "_rms"] = add_uncertainties(T_flagged) av = np.ma.average(T_flagged, axis=0) weighted = av / dump_data[ant + "_rms"]**2 dump_data[ant + "_weighted"] = weighted if flag: total = T_flagged.shape[0] * T_flagged.shape[1] num_in = np.ma.MaskedArray.count(T_flagged) print ant, ("%.1f%%" % (100 * (total - num_in) / total) ), "flagged.", "Count:", total - num_in # Add the stripe onto the right edge of the data and adjust the extent of the x-axis (frequency) to cover the stripe. if all_lsts: T_flagged_plot = np.ma.concatenate((T_flagged, padding), axis=1) else: T_flagged_plot = T_flagged ax.set_yticks(yloc) ax.set_yticklabels(ylabel) ax.tick_params(axis='y', pad=2) if flatten: if type(T_flagged_plot) is np.ma.core.MaskedArray: abp = np.ma.median(T_flagged_plot.data, axis=0) else: abp = np.ma.median(T_flagged_plot, axis=0) abp /= np.ma.median(abp) T_flagged_plot /= abp try: clim = (percentile(T_flagged_plot.compressed(), 5), percentile(T_flagged_plot.compressed(), 95)) except AttributeError: clim = (percentile(T_flagged_plot, 5), percentile(T_flagged_plot, 95)) else: clim = (1000, 10000) im = ax.imshow( T_flagged_plot, # / np.median(xx, axis=0), cmap='jet', aspect='auto', interpolation='nearest', clim=clim, extent=(xlims[0], new_x_high, ylims[1], ylims[0])) ax.set_title(ant) if p == 1: ax.set_xlabel("Frequency [MHz]") if ii == 0: ax.set_ylabel("LST [hr]") #ax.yaxis_date() #ax.yaxis.set_major_formatter(hfmt) # if not flatten: fig.subplots_adjust(left=0.07) fig.subplots_adjust(right=0.875) cbar_ax = fig.add_axes([0.9, 0.125, 0.025, 0.75]) cbar = fig.colorbar(im, cax=cbar_ax) #plt.subplot(2,3,3) #cbar = plt.colorbar() cbar.set_label("Temperature [K]") cbar.ax.tick_params(axis='y', pad=2) #plt.tight_layout() if save: plt.savefig(os.path.basename(filename)[:-3] + ".png") if not no_show: plt.show() if dump: dump_data["lsts"] = lst_stamps dump_data["utcs"] = np.array([str(pytime) for pytime in utc_stamps]) dump_data["frequencies"] = f_leda dump_data["options"] = "Flag=" + str(flag) + " Merge=" + str( merge) + " Flatten=" + str(flatten) + " All LSTSs=" + str(all_lsts) hickle.dump(dump_data, os.path.basename(filename)[:-3] + ".hkl")
figsize=(15, 15), savename=savename, show=False) # =========================================================================== # Save mask print("\tSaving mask as hickle binary ...") # convert to int32 for memory efficiency nuclei = nuclei.astype(np.int32) # see: https://github.com/telegraphic/hickle savename = mask_save_path + imname.split(".")[0] + ".hkl" with open(savename, 'w') as f: hkl.dump(nuclei, f) # =========================================================================== # Divide into (potentially overlapping) FOVs and save # Get FOV bounds (M, N, Depth) = im.shape FOV_bounds = get_fov_bounds(M, N, fov_dims=fov_dims, shift_step=shift_step) n_fovs = len(FOV_bounds) savename_ims_base = input_for_maskrcnn_path_images + imname.split(".")[0] savename_masks_base = input_for_maskrcnn_path_labels + imname.split(".")[0] # size threshold for exclusion (edge of tile) min_pixels = 150
def make_optflow_dataset(dataset="train"): if dataset == "train": ID = TRAIN_PEOPLE_ID elif dataset == "dev": ID = DEV_PEOPLE_ID else: ID = TEST_PEOPLE_ID # Setup parameters for optical flow. farneback_params = dict(winsize=20, iterations=1, flags=cv2.OPTFLOW_FARNEBACK_GAUSSIAN, levels=1, pyr_scale=0.5, poly_n=5, poly_sigma=1.1, flow=None) frames_idx = parse_sequence_file() data = [] for category in CATEGORIES: # Get all files in current category's folder. folder_path = os.path.join(category) filenames = sorted(os.listdir(folder_path)) for filename in filenames: filepath = os.path.join(category, filename) # Get id of person in this video. person_id = int(filename.split("_")[0][6:]) if person_id not in ID: continue vid = imageio.get_reader(filepath, "ffmpeg") flow_x = [] flow_y = [] prev_frame = None # Add each frame to correct list. for i, frame in enumerate(vid): # Boolean flag to check if current frame contains human. ok = False for seg in frames_idx[filename]: if i >= seg[0] and i <= seg[1]: ok = True break if not ok: continue # Convert to grayscale. frame = Image.fromarray(np.array(frame)) frame = frame.convert("L") frame = np.array(frame.getdata(), dtype=np.uint8).reshape( (120, 160)) frame = np.array(Image.fromarray(frame).resize((60, 80))) if prev_frame is not None: # Calculate optical flow. flows = cv2.calcOpticalFlowFarneback( prev_frame, frame, **farneback_params) subsampled_x = np.zeros((30, 40), dtype=np.float32) subsampled_y = np.zeros((30, 40), dtype=np.float32) for r in range(30): for c in range(40): subsampled_x[r, c] = flows[r * 2, c * 2, 0] subsampled_y[r, c] = flows[r * 2, c * 2, 1] flow_x.append(subsampled_x) flow_y.append(subsampled_y) prev_frame = frame data.append({ "filename": filename, "category": category, "flow_x": flow_x, "flow_y": flow_y }) hkl.dump(data, "%s_flow.hkl" % dataset)