Example #1
0
def do_all(runname='brownseds_highz', outfolder=None, regenerate=False, regenerate_stack=False, **opts):

    if outfolder is None:
        outfolder = os.getenv('APPS') + '/prospector_alpha/plots/'+runname+'/pcomp/'
        if not os.path.isdir(outfolder):
            os.makedirs(outfolder)
            os.makedirs(outfolder+'data/')

    stack_opts = {
              'sigma_sf':0.3,                  # scatter in the star-forming sequence, in dex
              'nbins_horizontal':3,            # number of bins in horizontal stack
              'nbins_vertical':4,              # number of bins in vertical stack
              'horizontal_bin_colors': ['#45ADA8','#FC913A','#FF4E50'],
              'vertical_bin_colors': ['red','#FC913A','#45ADA8','#323299'],
              'low_mass_cutoff':9.5,          # log(M) where we stop stacking and plotting
              'high_mass_cutoff': 11.5,
              'ylim_horizontal_sfr': (-0.8,3),
              'ylim_horizontal_ssfr': (1e-13,1e-9),
              'ylim_vertical_sfr': (-3,3),
              'ylim_vertical_ssfr': (1e-13,1e-9),
              'xlim_t': (1e7,1.4e10),
              'show_disp':[0.16,0.84]         # percentile of population distribution to show on plot
             }

    filename = outfolder+'data/single_sfh_stack.h5'
    if os.path.isfile(filename) and regenerate_stack == False:
        with open(filename, "r") as f:
            stack = hickle.load(f)
    else:
        data = collate_data(runname,filename=outfolder+'data/stacksfh.h5',regenerate=regenerate,**opts)
        stack = stack_sfh(data,regenerate_stack=regenerate_stack, **stack_opts)
        hickle.dump(stack,open(filename, "w"))

    plot_stacked_sfh(stack,outfolder, **stack_opts)
Example #2
0
def process_data():
    splits = {s: [] for s in ['train', 'test', 'val']}
    splits['val'] = val_recordings
    splits['test'] = test_recordings
    not_train = splits['val'] + splits['test']
    for c in categories:  # Randomly assign recordings to training and testing. Cross-validation done across entire recordings.
        c_dir = os.path.join(DATA_DIR, 'raw', c + '/')
        _, folders, _ = os.walk(c_dir).next()
        splits['train'] += [(c, f) for f in folders if (c, f) not in not_train]

    for split in splits:
        im_list = []
        source_list = []  # corresponds to recording that image came from
        for category, folder in splits[split]:
            im_dir = os.path.join(DATA_DIR, 'raw/', category, folder, folder[:10], folder, 'image_03/data/')
            _, _, files = os.walk(im_dir).next()
            im_list += [im_dir + f for f in sorted(files)]
            source_list += [category + '-' + folder] * len(files)

        print 'Creating ' + split + ' data: ' + str(len(im_list)) + ' images'
        X = np.zeros((len(im_list),) + desired_im_sz + (3,), np.uint8)
        for i, im_file in enumerate(im_list):
            im = imread(im_file)
            X[i] = process_im(im, desired_im_sz)

        hkl.dump(X, os.path.join(DATA_DIR, 'X_' + split + '.hkl'))
        hkl.dump(source_list, os.path.join(DATA_DIR, 'sources_' + split + '.hkl'))
Example #3
0
def save_hkl_file(filename, data):
    hkl_filename = filename + '.hkl'
    try:
        hkl.dump(data, hkl_filename, mode="w")
        return True
    except Exception:
        print 'remove %s' % hkl_filename
Example #4
0
def save_hkl_file(filename, data):
    hkl_filename = filename + '.hkl'
    try:
        hkl.dump(data, hkl_filename, mode="w")
        return True
    except Exception:
        os.remove(hkl_filename)
Example #5
0
def process_video(filename):

    print filename
    video = extract_frame(filename)
    video = vid_batch(video)
    print 'Video Loaded!'
    net, transformer = caffenet()
    feats = np.zeros((4096, 0), dtype=np.float32)

    net.blobs['data'].reshape(15, 3, 227, 227)
    for x in xrange(video.nbatch):
        frames = video.video[..., 15*x: 15*(x+1)]
        #cur_frames = np.zeros((227, 227, 3, 0), dtype=np.uint8)
        for i in xrange(frames.shape[-1]):
            cur_frame = frames[..., i]
            net.blobs['data'].data[i] = transformer.preprocess('data', cur_frame)
        out = net.forward()
        cur_data = net.blobs['fc7'].data.T
        if x == video.nbatch - 1:
            if video.padded != 0:
                cur_data = cur_data[...,:(15 - video.padded)]
        feats = np.concatenate((feats, cur_data), axis=1)
        print feats.shape

    out_file = filename.replace('E001', 'E001_fc7')
    out_file = out_file.replace('mp4', 'hkl')

    hkl.dump({'feats':feats}, out_file)

    return
Example #6
0
def save(filepath, data, svL=1, fmt='pkl'):
  """
  Save data as a pickle-format file.

  Input
    filepath  -  file name
    data      -  data
    svL       -  save level, 0 | {1} | 2
                   0: write to pathDst even it exist
                   1: write to pathDst even it exist
                   2: not write to pathDst if it exist
    fmt       -  format, {'pkl'} | 'hkl' | 'h5'
  """
  if svL == 0 or filepath is None:
    return

  # create fold if not exist
  foldPath = os.path.dirname(filepath)
  mkDir(foldPath)

  if fmt == 'pkl':
    # use pickle
    import cPickle
    with open(filepath, "w") as fo:
      cPickle.dump(data, fo, protocol=cPickle.HIGHEST_PROTOCOL)

  elif fmt == 'hkl':
    # use hickle, which is faster for large-scale data
    # https://github.com/telegraphic/hickle
    import hickle
    with open(filepath, "w") as fo:
      hickle.dump(data, fo)

  else:
    raise Exception('unknown fmt: {}'.format(fmt))
Example #7
0
def test_astropy_time_array():
    times = ['1999-01-01T00:00:00.123456789', '2010-01-01T00:00:00']
    t1 = Time(times, format='isot', scale='utc')
    hkl.dump(t1, "test_ap2.h5")
    t2 = hkl.load("test_ap2.h5")

    print(t1)
    print(t2)
    assert t1.value.shape == t2.value.shape
    for ii in range(len(t1)):
        assert t1.value[ii] == t2.value[ii]
    assert t1.format == t2.format
    assert t1.scale == t2.scale

    times = [58264, 58265, 58266]
    t1 = Time(times, format='mjd', scale='utc')
    hkl.dump(t1, "test_ap2.h5")
    t2 = hkl.load("test_ap2.h5")

    print(t1)
    print(t2)
    assert t1.value.shape == t2.value.shape
    assert np.allclose(t1.value, t2.value)
    assert t1.format == t2.format
    assert t1.scale == t2.scale
def config_loop():
    global pts 
    active,image = cam_feed.read()

    while active:
        cv2.imshow('Perspective',image)
        key = cv2.waitKey(1) & 0xFF
        corres_pts = np.array( [ (0,0), (image.shape[1],0),(image.shape[1],image.shape[0]),(0,image.shape[0]) ] )

        if len(pts) == 4:
            pts = np.array(pts)
            for (x, y) in pts:
                cv2.circle(image, (x, y), 5, (0, 255, 0), -1)

            h, _ = cv2.findHomography(pts.astype('float32'), corres_pts.astype('float32'))
            print 'Tranformation Matrix : ',h
            # save to config file
            config = { 'h' : h }
            config['threshold'] = 15
            config['dilate'] = 10
            hkl.dump(config,'.config')
            # warp image
            warped = cv2.warpPerspective(image, h, (image.shape[1],image.shape[0]) )
            cv2.imshow('Perspective',image)
            cv2.imshow('warped',warped)
            # pause
            cv2.waitKey(-1)
            break
def test():

	#im_file = caffe_root+'examples/images/cat.jpg'
	im_file = '/home/bill/Dropbox/Cox_Lab/Illusions/images/T_illusion.jpg'

	layer1 = 'pool2'
	layer2 = 'conv2'
	save_file = '/home/bill/Dropbox/Cox_Lab/Illusions/misc/T_feats2_'+layer1+'_notoversample.hkl'
	save_file2 = '/home/bill/Dropbox/Cox_Lab/Illusions/misc/T_recon_'+layer1+'_0-2tran.jpg'

	if not os.path.isfile(save_file):
		feats = get_features(im_file, layer1)
		hkl.dump(feats, open(save_file, 'w'))
	else:
		feats = hkl.load(open(save_file))
		#feats = feats.reshape((10,256,6,6))

	recon_im = get_recon(feats, layer2)
	#	img = Image.fromarray(recon_im, 'RGB')
	pdb.set_trace()
	#recon_im[recon_im<0] = 0
	#recon_im = recon_im/255
	plt.imshow(recon_im) #, cmap='Greys_r')
	plt.show(block=False)
	plt.savefig(save_file2)
	pdb.set_trace()
def pickle_dataset(input_pkl, output_pkl, img_path, id_label, PIXELS):
    data = pd.read_pickle(input_pkl)
    dataset = {}

    iter_images = iter(data[id_label])
    first_image = next(iter_images)
    im = Image.open(img_path + first_image + '.jpg', 'r')
    im = ImageOps.fit(im, (PIXELS, PIXELS), Image.ANTIALIAS)
    im = (np.array(im))
    r = im[:, :, 0].flatten()
    g = im[:, :, 1].flatten()
    b = im[:, :, 2].flatten()

    img_list = np.array(list(r) + list(g) + list(b), dtype='uint8')
    img_list = img_list[np.newaxis, :]

    for img_name in iter_images:
        im = Image.open(img_path + img_name + '.jpg', 'r')
        im = ImageOps.fit(im, (PIXELS, PIXELS), Image.ANTIALIAS)
        im = (np.array(im))
        r = im[:, :, 0].flatten()
        g = im[:, :, 1].flatten()
        b = im[:, :, 2].flatten()

        img = np.array(list(r) + list(g) + list(b), dtype='uint8')
        img_list = np.vstack((img_list, img[np.newaxis, :]))

    hkl.dump(img_list, output_pkl + '_data.hpy', mode='w', compression='gzip')
    hkl.dump(data['label'], output_pkl + '_labels.hpy', mode='w')

    del img_list
    del data
Example #11
0
def SaveBigDict(filename, root):
    if filename[-4:]!=".hkl":
        filename+=".hkl"
    if "GammaWStatis" in root.keys():
        gammaw=root["GammaWStatis"]
        gammaw["WeightAccu"]=array(gammaw["WeightAccu"], dtype=complex64)
    hkl.dump(root, "_"+filename, mode='w', compression='gzip')
    os.rename("_"+filename, filename)
Example #12
0
def test_astropy_quantity_array():
    a = Quantity([1,2,3], unit='m')

    hkl.dump(a, "test_ap.h5")
    b = hkl.load("test_ap.h5")

    assert np.allclose(a.value, b.value)
    assert a.unit == b.unit
Example #13
0
def test_astropy_angle_array():
    a = Angle([1,2,3], unit='degree')

    hkl.dump(a, "test_ap.h5")
    b = hkl.load("test_ap.h5")

    assert np.allclose(a.value, b.value)
    assert a.unit == b.unit
Example #14
0
def safe_store_h(path, o):
    print 'storing hkl:' + path
    directory = path[:path.rfind('/')]
    if not os.path.exists(directory):
        os.makedirs(directory)
    with open(path, "w") as f:
        hkl.dump(o, f)
    f.close()
Example #15
0
def test_astropy_angle():
    for uu in ['radian', 'degree']:
        a = Angle(1.02, unit=uu)

        hkl.dump(a, "test_ap.h5")
        b = hkl.load("test_ap.h5")
        assert a == b
        assert a.unit == b.unit
Example #16
0
  def save_weights(self, f_weights):
    ## previously saved as :: ca.W.get_value(borrow=True)
    to_hickle = dict(
      W       = self.W.get_value(borrow=True),
      b       = self.b.get_value(borrow=True),
      b_prime = self.b_prime.get_value(borrow=True),
    )

    hickle.dump(to_hickle, f_weights, mode='w', compression='gzip')
Example #17
0
def saveToHickle(array, name):
    """Save a numpy array to a hickle/HDF5 format binary file."""
    try:
        import hickle
    except:
        raise Exception("### The Hickle package is required!")

    output = open(name, 'w')
    hickle.dump(array, output, protocol=2)
    output.close()
Example #18
0
def collate_data(runname, filename=None, regenerate=False, **opts):
    """ pull out all of the necessary information from the individual data files
    this takes awhile, so this data is saved to disk.
    """

    # if it's already made, load it and give it back
    # else, start with the making!
    if os.path.isfile(filename) and regenerate == False:
        print 'loading all data'
        with open(filename, "r") as f:
            outdict=hickle.load(f)

        return outdict

    # define output containers
    outvar = ['stellar_mass','sfr_30', 'sfr_100','half_time']
    outdict = {q: {f: [] for f in ['q50','q84','q16']} for q in outvar}
    for f in ['objname','agebins', 'weights', 'z_fraction']: outdict[f] = [] 

    # we want MASS, SFR_100, Z_FRACTION CHAIN, and AGEBINS for each galaxy
    pfile.run_params['zred'] = None # make sure this is reset
    basenames = find_all_prospector_results(runname)
    for i, name in enumerate(basenames):

        # load output from fit
        try:
            res, _, model, prosp = load_prospector_data(name)
        except:
            print name.split('/')[-1]+' failed to load. skipping.'
            continue
        if (res is None) or (prosp is None):
            continue

        outdict['objname'] += [name.split('/')[-1]]
        print 'loaded ' + outdict['objname'][-1]

        # agebins (and generate model)
        pfile.run_params['objname'] = outdict['objname'][-1]
        model = pfile.load_model(**pfile.run_params)
        outdict['agebins'] += [model.params['agebins']]

        # zfraction
        zidx = model.theta_index['z_fraction']
        outdict['z_fraction'] += [res['chain'][prosp['sample_idx'], zidx]]
        outdict['weights'] += [prosp['weights']]

        # extra variables
        for v in outvar:
            for f in ['q50','q84','q16']: outdict[v][f] += [prosp['extras'][v][f]]

    # dump files and return
    hickle.dump(outdict,open(filename, "w"))
    return outdict
def create_moving_line(nt, line_len, nx, x0, y0, speed):

    X = np.zeros((nt, nx, nx)).astype(np.float32)

    for i in range(nt):

        xt = x0+i*speed

        X[i,y0:y0+line_lin,xt] = 1

    file_name = 'line.hkl'
    hkl.dump(X, open(file_name, 'w'))
    X = hkl.load(open(file_name))
Example #20
0
def test_embedded_array():
    """ See https://github.com/telegraphic/hickle/issues/24 """
    
    d_orig = [[np.array([10., 20.]), np.array([10, 20, 30])], [np.array([10, 2]), np.array([1.])]]
    hickle.dump(d_orig, 'test.h5')
    d_hkl = hickle.load('test.h5')
    
    for ii, xx in enumerate(d_orig):
        for jj, yy in enumerate(xx):
            assert np.allclose(d_orig[ii][jj], d_hkl[ii][jj])
    
    print d_hkl
    print d_orig
def scanImage(img,numStages,Npos):
	return_coordinate_list = []
	integral_image_list, coordinate_list = getIntegralImages(1280, 1600)
	for idx, I in enumerate(integral_image_list):
		is_face = cascade(I, Npos, numStages)
		k,j = coordinate_list[idx]
		if is_face == 1:
			#cv2.rectangle(img, (k,j), (k+64, j+64), (255,0,0), 3)
			return_coordinate_list.append((k,j))
		print idx

	hkl.dump(return_coordinate_list,'return_coordinate_list'+ str(class_numStages) +'.hkl')

	return return_coordinate_list
Example #22
0
def test_list_order():
    """ https://github.com/telegraphic/hickle/issues/26 """
    d = [np.arange(n + 1) for n in range(20)]
    hickle.dump(d, 'test.h5')
    d_hkl = hickle.load('test.h5')
    
    try:
        for ii, xx in enumerate(d):
            assert d[ii].shape == d_hkl[ii].shape
        for ii, xx in enumerate(d):
            assert np.allclose(d[ii], d_hkl[ii])
    except AssertionError:
        print d[ii], d_hkl[ii]
        raise
Example #23
0
 def save_to_internal(self, data):
     """save
     """
     if self.filetype is "pickle":
         pickle.dump(data, open(self.location_internal, "wb"))
     elif self.filetype is "hickle":
         import hickle
         hickle.dump(data, open(self.location_internal, "wb"))
     else:
         raise ValueError(
             "Invalid filetype {} (must be {} or {})".format(
                 self.filetype, "pickle", "hickle"
             )
         )
Example #24
0
def test_astropy_skycoord():
    ra = Angle(['1d20m', '1d21m'], unit='degree')
    dec = Angle(['33d0m0s', '33d01m'], unit='degree')
    radec = SkyCoord(ra, dec)
    hkl.dump(radec, "test_ap.h5")
    radec2 = hkl.load("test_ap.h5")
    assert np.allclose(radec.ra.value, radec2.ra.value)
    assert np.allclose(radec.dec.value, radec2.dec.value)

    ra = Angle(['1d20m', '1d21m'], unit='hourangle')
    dec = Angle(['33d0m0s', '33d01m'], unit='degree')
    radec = SkyCoord(ra, dec)
    hkl.dump(radec, "test_ap.h5")
    radec2 = hkl.load("test_ap.h5")
    assert np.allclose(radec.ra.value, radec2.ra.value)
    assert np.allclose(radec.dec.value, radec2.dec.value)
Example #25
0
def test_astropy_quantity():

    for uu in ['m^3', 'm^3 / s', 'kg/pc']:
        a = Quantity(7, unit=uu)

        hkl.dump(a, "test_ap.h5")
        b = hkl.load("test_ap.h5")

        assert a == b
        assert a.unit == b.unit

        a *= a
        hkl.dump(a, "test_ap.h5")
        b = hkl.load("test_ap.h5")
        assert a == b
        assert a.unit == b.unit
def reweigh(Npos):

	total_images = Npos * 2

	features = hkl.load('features'+str(Npos)+'.hkl')

	label = np.zeros((total_images, 1))

	label[:,0] = [1]*Npos + [0]*Npos

	weight = np.ones((total_images,1)) / total_images

	feature_index_list = []
	alpha_list = []
	theta_list = []
	polarity_list = []
	best_result_list = []

	for t in xrange(wcnum):

		currentMin, theta, polarity, featureIdx, bestResult = getWeakClassifier(features,weight,label,Npos)

		alpha = log((1 - currentMin)/currentMin) / 2.0
		Z = 2.0 * sqrt( currentMin * ( 1.0 - currentMin))


		feature_index_list.append(featureIdx)
		alpha_list.append(alpha)
		theta_list.append(theta)
		polarity_list.append(polarity)
		best_result_list.append(bestResult)
		
		print "---"
		print "t", t
		print "featureIdx", featureIdx


		for i in xrange(total_images):

			weight[i,0] =  (weight[i,0] * exp(-1 *  alpha * label[i] * bestResult[i]))/ Z


	hkl.dump(feature_index_list,'feature_index_list' + str(Npos) + ".hkl")
	hkl.dump(alpha_list,'alpha_list' + str(Npos) + ".hkl")
	hkl.dump(theta_list,'theta_list' + str(Npos) + ".hkl")
	hkl.dump(polarity_list,'polarity_list' + str(Npos) + ".hkl")
	hkl.dump(best_result_list,'best_result_list' + str(Npos) + ".hkl")
Example #27
0
def test_astropy_table():
    t = Table([[1, 2], [3, 4]], names=('a', 'b'), meta={'name': 'test_thing'})

    hkl.dump({'a': t}, "test_ap.h5")
    t2 = hkl.load("test_ap.h5")['a']

    print(t)
    print(t.meta)
    print(t2)
    print(t2.meta)

    print(t.dtype, t2.dtype)
    assert t.meta == t2.meta
    assert t.dtype == t2.dtype

    assert np.allclose(t['a'].astype('float32'), t2['a'].astype('float32'))
    assert np.allclose(t['b'].astype('float32'), t2['b'].astype('float32'))
def dump_names(ent_feats_dir):
    st = ['mean','var','median','max','min','max-min']

    n = []
    n.extend( ['ent_q_diffs_' + str(x) for x in range(21) ])
    n.extend( ['ent_q_diffs_' + x for x in st])

    n.extend( ['ent_q_diff_diffs_' + str(x) for x in range(21) ])
    n.extend( ['ent_q_diff_diffs_' + x for x in st])

    for i in range(4):
        n.extend( ['ent_q_diff_block_' + str(i) + '_' + str(x) for x in range(21) ])
        n.extend( ['ent_q_diff_diffs_'+ str(i) + '_' + x for x in st])

    n.extend( ['ent_p_' + str(x) for x in range(20) ])
    n.extend( ['ent_p_diffs_' + str(x) for x in range(20) ])

    hickle.dump(n,os.path.join(ent_feats_dir,'ent_feats_names'))
Example #29
0
 def parse_data_to_internal(self, data=None):
     """
     Parse data and save to pickle/hickle
     """
     if data is None:
         data = parse.getdata(open(self.location_dat, "rb"),
                              argnum=self.argnum, close=True)
     if self.filetype is "pickle":
         pickle.dump(data, open(self.location_internal, "wb"))
     elif self.filetype is "hickle":
         import hickle
         hickle.dump(data, open(self.location_internal, "wb"))
     else:
         raise ValueError(
             "Invalid filetype {} (must be {} or {})".format(
                 self.filetype, "pickle", "hickle"
             )
         )
Example #30
0
 def parse_data_to_internal(self, data=None):
     """Use numpy loadtxt
     """
     if data is None:
         kwargs = self.kwargs
         data = np.loadtxt(
             open(self.location_dat, "rb"), **kwargs
         )
     if self.filetype is "pickle":
         pickle.dump(data, open(self.location_internal, "wb"))
     elif self.filetype is "hickle":
         import hickle
         hickle.dump(data, open(self.location_internal, "wb"))
     else:
         raise ValueError(
             "Invalid filetype {} (must be {} or {})".format(
                 self.filetype, "pickle", "hickle"
             )
         )
Example #31
0
def main(args):
    cur_dir = os.path.dirname(os.path.realpath(__file__))
    root_dir = os.path.dirname(os.path.dirname(cur_dir))
    data_dir = os.path.join(root_dir, 'data', 'slxrobot')
    anno_path = os.path.join(data_dir, 'Annotations')
    image_path = os.path.join(data_dir, 'Images')
    mask_path = os.path.join(data_dir, 'Masks')
    mask_dest_path = os.path.join(root_dir, 'data', 'cache', 'slxrobot',
                                  'Masks')
    if not os.path.exists(mask_dest_path):
        os.makedirs(mask_dest_path)

    classes = [
        '__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
        'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
        'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse',
        'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 'backpack',
        'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis',
        'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove',
        'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass',
        'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich',
        'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake',
        'chair', 'couch', 'potted plant', 'bed', 'dining table', 'toilet',
        'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book',
        'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush',
        'floor'
    ]
    cls_to_id = dict(zip(classes, range(len(classes))))
    id_to_cls = dict(zip(range(len(classes)), classes))
    image_list = os.listdir(image_path)
    gt_sdsdb = []
    for image in image_list:
        image_idx = image.split('.')[0]
        try:
            image_i = int(image_idx)
        except:
            continue
        anno_file = os.path.join(anno_path, '%s.xml' % image_idx)
        if not os.path.exists(anno_file):
            continue

        tree = ET.ElementTree(file=anno_file)
        bool_masks = []
        boxes = []
        gt_classes = []
        height = int(tree.find('imagesize').find('nrows').text)
        width = int(tree.find('imagesize').find('ncols').text)
        for object in tree.iter('object'):
            seg = object.find('segm')
            deleted = object.find('deleted')
            if int(deleted.text) == 1 or seg is None:
                continue
            cls = object.find('name').text
            if cls in cls_to_id:
                gt_classes.append(cls_to_id[cls])
            else:
                print(
                    'Class [%s] is not in the training classes, ignoring it ...'
                    % cls)
                continue
            box = seg.find('box')
            x1 = int(box.find('xmin').text)
            y1 = int(box.find('ymin').text)
            x2 = int(box.find('xmax').text)
            y2 = int(box.find('ymax').text)
            x1 = np.max((0, x1))
            y1 = np.max((0, y1))
            x2 = np.min((width - 1, x2))
            y2 = np.min((height - 1, y2))
            mask = seg.find('mask')
            mask_text = mask.text
            mask_file = os.path.join(mask_path, mask_text)
            mask_im = cv2.imread(mask_file)
            gray_mask = cv2.cvtColor(mask_im, cv2.COLOR_BGR2GRAY)
            bool_mask = np.full(gray_mask.shape, False, dtype=bool)
            mask_row, mask_col = np.where(gray_mask > 1)
            bool_mask[mask_row, mask_col] = True
            bool_masks.append(bool_mask)
            boxes.append([x1, y1, x2, y2])
        gt_classes = np.asarray(gt_classes)
        bool_masks = np.asarray(bool_masks)
        boxes = np.asarray(boxes)
        gt_overlaps = np.zeros((gt_classes.size, len(classes)),
                               dtype=np.float32)
        gt_overlaps[np.arange(gt_overlaps.shape[0]), gt_classes] = 1
        max_overlaps = gt_overlaps.max(axis=1)

        gt_mask_file = os.path.join(mask_dest_path, '%s.hkl' % image_idx)
        gt_mask_flip_file = os.path.join(mask_dest_path,
                                         '%s_flip.hkl' % image_idx)
        if not os.path.exists(gt_mask_file):
            print('Saving %s' % gt_mask_file)
            hkl.dump(bool_masks.astype('bool'),
                     gt_mask_file,
                     mode='w',
                     compression='gzip')
        if not os.path.exists(gt_mask_flip_file):
            print('Saving %s' % gt_mask_flip_file)
            hkl.dump(bool_masks[:, :, ::-1].astype('bool'),
                     gt_mask_flip_file,
                     mode='w',
                     compression='gzip')

        sdb = {
            'boxes':
            boxes,
            'cache_seg_inst':
            '%s/%s.hkl' %
            (os.path.relpath(mask_dest_path, cur_dir), image_idx),
            'flipped':
            False,
            'gt_classes':
            gt_classes,
            'gt_overlaps':
            gt_overlaps,
            'height':
            height,
            'width':
            width,
            'image':
            '%s/%s' % (os.path.relpath(image_path, cur_dir), image),
            'max_classes':
            gt_overlaps.argmax(axis=1),
            'max_overlaps':
            max_overlaps,
        }
        gt_sdsdb.append(sdb)
        if args.flip:
            sdb = {
                'boxes':
                boxes,
                'cache_seg_inst':
                '%s/%s.hkl' %
                (os.path.relpath(mask_dest_path, cur_dir), image_idx),
                'flipped':
                True,
                'gt_classes':
                gt_classes,
                'gt_overlaps':
                gt_overlaps,
                'height':
                height,
                'width':
                width,
                'image':
                '%s/%s' % (os.path.relpath(image_path, cur_dir), image),
                'max_classes':
                gt_overlaps.argmax(axis=1),
                'max_overlaps':
                max_overlaps,
            }
            gt_sdsdb.append(sdb)
    gt_sdsdb_file = os.path.join(mask_dest_path, 'gt_sdsdb.pkl')
    with open(gt_sdsdb_file, 'wb') as f:
        print('Length of gt_sdsdb:', len(gt_sdsdb))
        pkl.dump(gt_sdsdb, f, protocol=pkl.HIGHEST_PROTOCOL)
Example #32
0
def cache(n_apparent, n_true, inl_stats, R_errs, t_errs):
    hkl.dump([n_apparent, n_true, inl_stats, R_errs, t_errs],
             open(path(), 'w'))
def main():

    for fn in sorted(os.listdir(DATA_DIR)):
        print fn

        if (fn[-3:] == 'hkl'):

            OUTPUT_DIR_IMAGES = OUTPUT_DIR + fn[0:-4] + '/'
            if not os.path.exists(OUTPUT_DIR_IMAGES):
                os.makedirs(OUTPUT_DIR_IMAGES)

            shape = (128, 128)
            [
                grids, gridglobal_x, gridglobal_y, transforms, vel_east,
                vel_north, acc_x, acc_y, adjust_indices
            ] = hkl.load(DATA_DIR + fn)
            grids = np.array(grids)
            grids = crop_center(grids, shape[0])

            do_plot = True  # Toggle me for DOGMA plots!

            # PARAMETERS
            p_B = 0.02  # birth probability
            Vb = 2 * 10**4  # number of new born particles
            V = 2 * 10**5  # number of consistent particles
            state_size = 4  # number of states: p,v: 4
            alpha = 0.9  # information ageing (discount factor)

            p_A = 1.0  # association probability: only relevant for Doppler measurements
            T = 0.1  # measurement frequency (10 Hz)
            p_S = 0.99  # particle persistence probability

            # velocity, acceleration variance initialization
            scale_vel = 12.
            scale_acc = 2.

            # position, velocity, acceleration process noise
            process_pos = 0.06
            process_vel = 2.4
            process_acc = 0.2

            # print debug values
            verbose = False

            # for plotting thresholds
            mS = 3.  # static threshold
            epsilon = 10.  # vel mag threshold
            epsilon_occ = 0.75  # occ mag threshold

            # initialize a grid
            start = time.time()
            grid_cell_array = GridCellArray(shape, p_A)
            end = time.time()
            print "grid_cell_array initialization took", end - start

            # initialize a particle array
            start = time.time()
            particle_array = ParticleArray(V, grid_cell_array.get_shape(),
                                           state_size, T, p_S, scale_vel,
                                           scale_acc, process_pos, process_vel,
                                           process_acc)
            end = time.time()
            print "particle_array initialization took", end - start

            # data: [N x 2 x W x D]
            # second dimension is masses {0: m_free, 1: m_occ}
            # in original grid: 0: unknown, 1: occupied, 2: free (raw data)
            data = create_DST_grids(grids)

            # number of measurements in the run
            N = data.shape[0]

            # list of 4x256x256 grids with position, velocity information
            DOGMA = []
            var_x_vel = []
            var_y_vel = []
            covar_xy_vel = []
            var_x_acc = []
            var_y_acc = []
            covar_xy_acc = []

            # run particle filter iterations
            for i in range(N):

                start = time.time()

                # initializes a measurement cell array
                meas_free = data[i, 0, :, :].flatten()
                meas_occ = data[i, 1, :, :].flatten()

                meas_cell_array = MeasCellArray(meas_free,
                                                meas_occ,
                                                grid_cell_array.get_shape(),
                                                pseudoG=1.)

                # algorithm 1: ParticlePrediction (stored in particle_array)
                ParticlePrediction(particle_array, grid_cell_array, res=1.0)

                # algorithm 2: ParticleAssignment (stored in particle_array)
                ParticleAssignment(particle_array, grid_cell_array)

                # algorithm 3: OccupancyPredictionUpdate (stored in grid_cell_array)
                OccupancyPredictionUpdate(meas_cell_array,
                                          grid_cell_array,
                                          particle_array,
                                          p_B,
                                          alpha,
                                          check_values=verbose)

                # algorithm 4: PersistentParticleUpdate (stored in particle_array)
                PersistentParticleUpdate(particle_array,
                                         grid_cell_array,
                                         meas_cell_array,
                                         check_values=verbose)

                # algorithm 5: NewParticleInitialization
                if p_B == 0:
                    empty_array = True
                else:
                    empty_array = False
                birth_particle_array = ParticleArray(
                    Vb,
                    grid_cell_array.get_shape(),
                    state_size,
                    T,
                    p_S,
                    scale_vel,
                    scale_acc,
                    process_pos,
                    process_vel,
                    process_acc,
                    birth=True,
                    empty_array=empty_array)
                NewParticleInitialization(Vb,
                                          grid_cell_array,
                                          meas_cell_array,
                                          birth_particle_array,
                                          check_values=verbose)

                # algorithm 6: StatisticMoments (stored in grid_cell_array)
                StatisticMoments(particle_array, grid_cell_array)

                if state_size == 4:

                    newDOGMA, new_var_x_vel, new_var_y_vel, new_covar_xy_vel = get_dogma(
                        grid_cell_array, grids, state_size, grids[i, :, :],
                        shape)

                    var_x_vel.append(new_var_x_vel)
                    var_y_vel.append(new_var_y_vel)
                    covar_xy_vel.append(new_covar_xy_vel)

                # save the velocities at this timestep: no real occupancy grid computed here; we will just use the measurement grid for now
                DOGMA.append(newDOGMA)

                # algorithm 7: Resample
                # skips particle initialization for particle_array_next because all particles will be copied in
                particle_array_next = ParticleArray(V, grid_cell_array.get_shape(), state_size, T, p_S, \
                                          scale_vel, scale_acc, process_pos, process_vel, process_acc, empty_array = True)
                Resample(particle_array,
                         birth_particle_array,
                         particle_array_next,
                         check_values=verbose)
                # switch to new particle array
                particle_array = particle_array_next
                particle_array_next = None

                end = time.time()
                print "Time per iteration: ", end - start

                # Plotting: The environment is stored in grids[i] (matrix of  values (0,1,2))
                #           The DOGMA is stored in DOGMA[i]
                if (do_plot):
                    head_grid = dogma2head_grid(DOGMA[i], var_x_vel[i],
                                                var_y_vel[i], covar_xy_vel[i],
                                                mS, epsilon, epsilon_occ)
                    occ_grid = grids[i, :, :]
                    title = "DOGMa Iteration %d" % i
                    colorwheel_plot(head_grid,
                                    occ_grid=occ_grid,
                                    m_occ_grid=DOGMA[i][0, :, :],
                                    title=os.path.join(OUTPUT_DIR_IMAGES,
                                                       title),
                                    show=True,
                                    save=True)

                if (((i + 1) % 50 == 0) or (i == N - 1)):

                    hkl.dump([DOGMA, var_x_vel, var_y_vel, covar_xy_vel],
                             os.path.join(OUTPUT_DIR, fn),
                             mode='w')

                    print "DOGMA written to hickle file."

                print "Iteration ", i, " complete"

    return
Example #34
0
 def save_model_params(self, filename):
     """Pickels the parameters within a Lasagne model."""
     data = lasagne.layers.get_all_param_values(self._network)
     filename = os.path.join('./', filename)
     with open(filename, 'w') as f:
         hickle.dump(data, f)
Example #35
0
max_lr_contact = max([nb_lr_contacts[item] for item in nb_lr_contacts.keys()])

#normalization

print("> extract normalized Hi-C data... ")

hr_contacts_norm_dict = {item:np.log2(hr_contacts_dict[item]*max_hr_contact/sum(sum(hr_contacts_dict[item]))+1) for item in hr_contacts_dict.keys()}
lr_contacts_norm_dict = {item:np.log2(lr_contacts_dict[item]*max_lr_contact/sum(sum(lr_contacts_dict[item]))+1) for item in lr_contacts_dict.keys()}

max_hr_contact_norm={item:hr_contacts_norm_dict[item].max() for item in hr_contacts_dict.keys()}
max_lr_contact_norm={item:lr_contacts_norm_dict[item].max() for item in lr_contacts_dict.keys()}


# WRITE NB CONTACT FILES
nb_hr_contactsFile = os.path.join(out_dir, out_dir + "_nb_hr_contacts.hkl")
hkl.dump(nb_hr_contacts, nb_hr_contactsFile)
print("... written: " + nb_hr_contactsFile)

nb_lr_contactsFile = os.path.join(out_dir, out_dir + "_nb_lr_contacts.hkl")
hkl.dump(nb_lr_contacts,nb_lr_contactsFile)
print("... written: " + nb_lr_contactsFile)


# WRITE MAX CONTACT FILES
max_hr_contact_normFile = os.path.join(out_dir, out_dir + "_max_hr_contact_norm.hkl")
hkl.dump(max_hr_contact_norm,max_hr_contact_normFile)
print("... written: " + max_hr_contact_normFile)

max_lr_contact_normFile = os.path.join(out_dir, out_dir + "_max_lr_contact_norm.hkl")
hkl.dump(max_lr_contact_norm,max_lr_contact_normFile)
print("... written: " + max_lr_contact_normFile)
Example #36
0
# NORMALIZATION NOT DONE HERE
print("> extract normalized Hi-C data... ")
#coexpr_contacts_norm_dict = {item:np.log2(coexpr_contacts_dict[item]*max_coexpr_contact/sum(sum(coexpr_contacts_dict[item]))+1) for item in coexpr_contacts_dict.keys()}
#hic_contacts_norm_dict = {item:np.log2(hic_contacts_dict[item]*max_hic_contact/sum(sum(hic_contacts_dict[item]))+1) for item in hic_contacts_dict.keys()}
#max_coexpr_contact_norm={item:coexpr_contacts_norm_dict[item].max() for item in coexpr_contacts_dict.keys()}
#max_hic_contact_norm={item:hic_contacts_norm_dict[item].max() for item in hic_contacts_dict.keys()}
# STILL SET THE VARIABLES BECAUSE USED IN THE FUNCTIONS
coexpr_contacts_norm_dict = coexpr_contacts_dict
hic_contacts_norm_dict = hic_contacts_dict
max_coexpr_contact={item:coexpr_contacts_dict[item].max() for item in coexpr_contacts_dict.keys()}
max_hic_contact={item:hic_contacts_dict[item].max() for item in hic_contacts_dict.keys()}


# WRITE NB CONTACT FILES
nb_coexpr_contactsFile = os.path.join(out_dir, out_dir + "_nb_coexpr_contacts.hkl")
hkl.dump(nb_coexpr_contacts, nb_coexpr_contactsFile)
print("... written: " + nb_coexpr_contactsFile)

nb_hic_contactsFile = os.path.join(out_dir, out_dir + "_nb_hic_contacts.hkl")
hkl.dump(nb_hic_contacts,nb_hic_contactsFile)
print("... written: " + nb_hic_contactsFile)


# WRITE MAX CONTACT FILES
#max_coexpr_contact_normFile = os.path.join(out_dir, out_dir + "_max_coexpr_contact_norm.hkl")
#hkl.dump(max_coexpr_contact_norm,max_coexpr_contact_normFile)
#print("... written: " + max_coexpr_contact_normFile)
#max_hic_contact_normFile = os.path.join(out_dir, out_dir + "_max_hic_contact_norm.hkl")
#hkl.dump(max_hic_contact_norm,max_hic_contact_normFile)
#print("... written: " + max_hic_contact_normFile)
Example #37
0
def save_hickle_file(filename, data):
    check_cache()
    filename = filename + '.hickle'

    with open(filename, 'w') as f:
        hickle.dump(data, f, mode='w')
Example #38
0
for part in range(part_num):
    print "part", part, "of %s features" % split
    anno_path = '/home/jason6582/sfyc/attention-tensorflow/mscoco/cocodata/%s/%s.annotations_%s.pkl'\
                % (split, split, str(part))
    save_path = '/home/jason6582/sfyc/attention-tensorflow/mscoco/feature_2048/%s/%s.features_%s.hkl'\
                % (split, split, str(part))
    with open(anno_path, 'rb') as f:
        annotations = pickle.load(f)
    image_path = list(annotations['file_name'].unique())
    n_examples = len(image_path)

    all_feats = np.ndarray([n_examples, 2048], dtype=np.float32)
    for start, end in zip(
            range(0, n_examples, batch_size),
            range(batch_size, n_examples + batch_size, batch_size)):
        image_batch_file = image_path[start:end]
        image_batch = np.array(map(lambda x: ndimage.imread(x, mode='RGB'),\
                image_batch_file))
        image_batch = image_batch.astype(np.float32)
        image_batch = np.transpose(image_batch, (0, 3, 1, 2))
        image_batch = torch.Tensor(image_batch).cuda()
        image_var = Variable(image_batch, volatile=True).cuda()
        feats = resnet152(image_var)
        feats = np.reshape(feats.data.cpu().numpy(), [-1, 2048])
        # feats = np.transpose(feats, (0, 2, 1))
        all_feats[start:end, :] = feats
        print("Processed %d %s features.." % (end, split))
    # use hickle to save huge feature vectors
    hickle.dump(all_feats, save_path)
    print("Saved %s.." % (save_path))
Example #39
0
def save_results(results_list: list, prefix: str):
    """Saves the results of a simulation run to disk. results_list is a list of tuples, where each tuple consists of
     a compressed, pickled dict and a dict of the shapes of the data in the compressed dict (metadata)"""
    # We use the first metadata to infer basic shape information
    current_shapes = results_list[0][1]
    replications = len(results_list)

    types = {
        "total_cash": np.float_,
        "total_excess_capital": np.float_,
        "total_profitslosses": np.float_,
        "total_contracts": np.int_,
        "total_operational": np.int_,
        "total_reincash": np.float_,
        "total_reinexcess_capital": np.float_,
        "total_reinprofitslosses": np.float_,
        "total_reincontracts": np.int_,
        "total_reinoperational": np.int_,
        "total_catbondsoperational": np.int_,
        "market_premium": np.float_,
        "market_reinpremium": np.float_,
        "cumulative_bankruptcies": np.int_,
        "cumulative_market_exits": np.int_,
        "cumulative_unrecovered_claims": np.float_,
        "cumulative_claims": np.float_,
        "cumulative_bought_firms": np.int_,
        "cumulative_nonregulation_firms": np.int_,
        "market_diffvar": np.float_,
        # Would store these two as an array of lists, but hdf5 can't do that
        "rc_event_schedule_initial": np.object,
        "rc_event_damage_initial": np.object,
        "number_riskmodels": np.int_,
        "unweighted_network_data": np.float_,
        "network_node_labels": np.float_,
        "network_edge_labels": np.float_,
        "number_of_agents": np.int_,
        "insurance_cumulative_dividends": np.float_,
        "reinsurance_cumulative_dividends": np.float_,
        # These are the big ones, so we need to pay attention to data types
        "insurance_firms_cash": np.float32,
        "reinsurance_firms_cash": np.float32,
        "insurance_contracts": np.uint16,
        "reinsurance_contracts": np.uint16,
    }
    # bad_logs are the logs that don't have a consistent size between replications
    bad_logs = [
        "rc_event_schedule_initial",
        "rc_event_damage_initial",
        "insurance_contracts",
        "insurance_firms_cash",
        "reinsurance_contracts",
        "reinsurance_firms_cash",
    ]
    event_info_names = ["rc_event_schedule_initial", "rc_event_damage_initial"]

    logs_found = current_shapes.keys()
    for name in logs_found:
        if name not in types:
            print(f"Warning: type of log {name} not known, assuming float")
            types[name] = np.float_
    shapes = {}
    for name in logs_found:
        if name not in bad_logs:
            # These are mostly standard 1-d timeseries, but may also include stuff like no_riskmodels
            shapes[name] = (replications, ) + current_shapes[name]
        else:
            # We could probably do this for all of the data, but this is fine for now.
            # These are sets of timeseries: the sets have variable size (also the event schedules)
            # We use the uncompressed metadata
            found_shapes = [result[1][name] for result in results_list]
            # This only works because the shapes only vary in one dimension (tuple comparison is lexicographic)
            shapes[name] = (replications, ) + max(found_shapes)

    # Make a skeleton data structure so we only need to have one uncompressed log in memory at a time
    results_dict = {
        name: np.zeros(shape=shapes[name], dtype=types[name])
        for name in current_shapes.keys()
    }
    # results_dict is a dictionary of numpy arrays, should be efficient to store.
    # The event schedules/damages are of differing lengths. Could pad them with NaNs, but probably
    # would be more trouble than it's worth

    for i, result_tuple in enumerate(results_list):
        result = pickle.loads(zlib.decompress(result_tuple[0]))
        for name in results_dict:
            if (name not in event_info_names) and hasattr(
                    result[name], "__len__"):
                arr = np.asarray(result[name])
                shape_slice = tuple([slice(i) for i in arr.shape])
                results_dict[name][i][shape_slice] = result[name]
            else:
                results_dict[name][i] = result[name]

    # Need to do a little pre-processing
    for key in list(results_dict.keys()):
        if not isinstance(results_dict[key], np.ndarray):
            raise ValueError(f"Results_dict[{key}] is not an array")
        if results_dict[key].size == 0:
            del results_dict[key]
            continue
        if results_dict[key].dtype == np.object:
            results_dict[key] = results_dict[key].tolist()
    data = results_dict
    # data = (True, (results_dict, event_info))
    # We store everything in one file(!)

    filename = "data/" + prefix + "_full_logs.hdf"

    if os.path.exists(filename):
        # Don't want to blindly overwrite, so make backups
        import time

        backupfilename = filename + "." + time.strftime("%Y-%m-%dT%H%M%S")
        os.rename(filename, backupfilename)
    # data is a tuple, first element indicating whether the logs are slim, second element being the data
    # TODO: Make everything else work with this new format
    # Import here so sandman never tries to import
    import hickle

    hickle.dump(data, filename, compression="gzip")
Example #40
0
def savefile(history, path):
    #    if not os.path.exists(path):
    #        os.makedirs(path)
    hkl.dump(history, path)
Example #41
0
def main():

    for fn in sorted(os.listdir(DATA_DIR)):

        if (fn[-3:] == 'hkl'):

            OUTPUT_DIR_IMAGES = OUTPUT_DIR + fn[0:-4] + '/'
            if not os.path.exists(OUTPUT_DIR_IMAGES):
                os.makedirs(OUTPUT_DIR_IMAGES)

            print fn

            [DOGMA, var_x_vel, var_y_vel,
             covar_xy_vel] = hkl.load(os.path.join(DATA_DIR, fn))

            # posO,posF,velX,velY,meas_grid
            DOGMA = np.array(DOGMA)
            var_x_vel = np.array(var_x_vel)
            var_y_vel = np.array(var_y_vel)
            covar_xy_vel = np.array(covar_xy_vel)

            do_plot = True  # Toggle me for DOGMA plots!

            # velocity, acceleration variance initialization
            scale_vel = 12.
            scale_acc = 2.

            # position, velocity, acceleration process noise
            process_pos = 0.06
            process_vel = 2.4
            process_acc = 0.2

            # for plotting thresholds
            mS = 4.  # 3.         # 4. static threshold
            epsilon = 10.  # vel mag threshold
            epsilon_occ = 0.95  # 0.75 # occ mag threshold

            # number of measurements in the run
            N = DOGMA.shape[0]

            newDOGMA = mahalanobis_filter(DOGMA, var_x_vel, var_y_vel,
                                          covar_xy_vel, mS, epsilon,
                                          epsilon_occ)

            print newDOGMA.shape

            if not os.path.exists(OUTPUT_DIR):
                os.makedirs(OUTPUT_DIR)

            hkl.dump(newDOGMA, os.path.join(OUTPUT_DIR + fn), mode="w")

            if do_plot:
                for i in range(N):

                    # Plotting: The environment is stored in grids[i] (matrix of  values (0,1,2))
                    #           The DOGMA is stored in DOGMA[i]
                    head_grid = dogma2head_grid(newDOGMA[i, :, :, :],
                                                DOGMA[i,
                                                      0, :, :], var_x_vel[i],
                                                var_y_vel[i], covar_xy_vel[i],
                                                mS, epsilon, epsilon_occ)
                    occ_grid = DOGMA[i, 4, :, :]
                    title = str(
                        i)  #"DOGMa Sequence %s Iteration %d" % (fn[0:5], i)
                    colorwheel_plot(head_grid, occ_grid=occ_grid, m_occ_grid = DOGMA[i,0,:,:], title=os.path.join(OUTPUT_DIR_IMAGES, title), \
                       show=True, save=True)

                    print "Iteration ", i, " complete"

    return
Example #42
0
def process_data():
    splits = {s: [] for s in ['val']}  # 'train', 'test',
    splits['val'] = val_recordings
    splits['test'] = test_recordings
    not_train = splits['val'] + splits['test']
    # for c in categories:  # Randomly assign recordings to training and testing. Cross-validation done across entire recordings.
    c_dir = os.path.join(DATA_DIR, 'RAW')  # no \
    seq_clip_list = {}
    folders = os.listdir(c_dir)  # list(os.walk(c_dir, topdown=False))[-1][-2]
    for folder in folders:
        if folder in excluded_list:
            continue
        filenames = sorted(glob.glob1(os.path.join(c_dir, folder), '*.jpg'))
        num_pat = re.compile("([0-9]+)\.")  # extract the numbering of frame
        img_ids = [
            int(num_pat.search(filename).group(1)) for filename in filenames
        ]
        start_id = min(img_ids)
        cur_id = start_id
        start_i = 0
        fn_groups = []
        groups = []
        for i, img_id in enumerate(img_ids):
            if img_id == cur_id:
                cur_id += 1
                if img_id == img_ids[-1]:
                    fn_groups.append((start_id, cur_id - 1))
                    groups.append((start_i, i + 1))
            else:  # if there is discontinuity in frame number, start a new group
                fn_groups.append((start_id, cur_id - 1))  # frame number
                groups.append((start_i, i + 1))  # list number
                # (start_i, end_i + 1), (start_id, end_id)
                # filename[start_i:i+1] = ['start_id', ... 'end_id']
                start_id = img_id
                start_i = i + 1
                cur_id = img_id + 1  # predictive coding!
        seq_clip_list[folder] = (fn_groups, groups)
        if 'train' in splits:
            splits['train'] += [(folder, clip) for clip in fn_groups
                                if (folder, clip) not in not_train]
    # TODO!
    for split in splits:
        t0 = time()
        im_list = []
        source_list = []  # corresponds to recording that image came from
        for folder, clip in splits[split]:
            im_dir = os.path.join(DATA_DIR, 'RAW', folder)
            filenames = sorted(glob.glob1(os.path.join(c_dir, folder),
                                          '*.jpg'))
            fn_groups, groups = seq_clip_list[folder]
            id_clip = groups[fn_groups.index(clip)]
            for res in range(downsample_rate):
                index_rng = range(id_clip[0] + res, id_clip[1],
                                  downsample_rate)
                im_list += [
                    im_dir + '\\' + f
                    for f in filenames[id_clip[0] +
                                       res:id_clip[1]:downsample_rate]
                ]
                source_list += [
                    folder + '-%d_%d-%d' % (clip[0], clip[1], res)
                ] * len(index_rng)

        print('Creating ' + split + ' data: ' + str(len(im_list)) + ' images')
        X = np.zeros((len(im_list), ) + desired_im_sz + (3, ), np.uint8)
        for i, im_file in enumerate(im_list):
            im = imread(im_file)
            X[i] = process_im(im, desired_im_sz)

        hkl.dump(X, os.path.join(DATA_DIR, 'X_' + split + '.hkl'))
        hkl.dump(source_list,
                 os.path.join(DATA_DIR, 'sources_' + split + '.hkl'))
        print('Spent %.1f s.' % (time() - t0))
Example #43
0
# Práca so súbormi
#MATLAB
# save myfile
# save myfile a b
# clear a b
# clear
# load myfile
#-----------------------------------------------------------
#PYTHON
import sys
import dill
import hickle

dill.dump_session('myfile1.pkl')  #ulozi vsetky premenne do suboru myfile

hickle.dump([A, B], 'myfile.pkl')  #ulozi A,B do suboru myfile

del A, B  #vymate A,B
dill.load_session('myfile1.pkl')  #nacita premenne zo suboru myfile

#for name in dir():
#print(name)
#if not name.startswith('_'): #vymazanie premenných
#del globals()[name]

#Funkcie
#MATLAB
#function y = myfunction(x)
#a = [-2 -1 0 1];
#y = a + x;
Example #44
0
                            axis=-1)),
            axis=-1)

        if not found:
            evidential_all = evidential_all_current
            found = True

        else:
            evidential_all = np.concatenate(
                (evidential_all, evidential_all_current), axis=0)

        source_list += [name] * evidential_all_current.shape[0]

    if split == 'train':
        hkl.dump(
            evidential_all,
            os.path.join(master_save_folder_double,
                         'X_' + split + '_prefiltered' + '.hkl'))
        hkl.dump(
            source_list,
            os.path.join(master_save_folder_double,
                         'sources_' + split + '_prefiltered' + '.hkl'))

    else:
        hkl.dump(
            evidential_all,
            os.path.join(master_save_folder_double, 'X_' + split + '.hkl'))
        hkl.dump(
            source_list,
            os.path.join(master_save_folder_double,
                         'sources_' + split + '.hkl'))
        hkl.dump(
Example #45
0
def main():
    # batch size for extracting feature vectors from vggnet.
    batch_size = 100
    # maximum length of caption(number of word). if caption is longer than max_length, deleted.
    max_length = 15  #15
    # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token.
    word_count_threshold = 1
    # vgg model path
    vgg_model_path = './data/imagenet-vgg-verydeep-19.mat'

    #path to resized images
    i_fp = './image/2014_resized/'
    #n_images = 67691
    #building dataset
    print 'Start processing caption data'
    train_dataset = get_caption_data(i_fp, max_length)
    print 'Finished processing caption data'

    #train, val, and test --> 70, 15, and 15
    train_cutoff = int(0.70 * len(train_dataset))
    val_cutoff = int(0.85 * len(train_dataset))

    #path to data directory
    d_fp = './data'
    if not os.path.exists(d_fp + '/train'):
        os.makedirs(d_fp + '/train')
    if not os.path.exists(d_fp + '/val'):
        os.makedirs(d_fp + '/val')
    if not os.path.exists(d_fp + '/test'):
        os.makedirs(d_fp + '/test')

    save_pickle(train_dataset[:train_cutoff],
                d_fp + '/train/train.annotations.pkl')
    save_pickle(train_dataset[train_cutoff:val_cutoff].reset_index(drop=True),
                d_fp + '/val/val.annotations.pkl')
    save_pickle(train_dataset[val_cutoff + 1:].reset_index(drop=True),
                d_fp + '/test/test.annotations.pkl')

    ################# train, val, and test data saved #####################

    for split in ['train', 'val', 'test']:
        annotations = load_pickle(d_fp + '/%s/%s.annotations.pkl' %
                                  (split, split))

        if split == 'train':
            word_to_idx = _build_vocab(annotations=annotations,
                                       threshold=word_count_threshold)
            save_pickle(word_to_idx, d_fp + '/%s/word_to_idx.pkl' % split)

        captions = _build_caption_vector(annotations=annotations,
                                         word_to_idx=word_to_idx,
                                         max_length=max_length)
        save_pickle(captions, d_fp + '/%s/%s.captions.pkl' % (split, split))

        file_names, id_to_idx = _build_file_names(annotations)
        save_pickle(file_names,
                    d_fp + '/%s/%s.file.names.pkl' % (split, split))

        image_idxs = _build_image_idxs(annotations, id_to_idx)
        save_pickle(image_idxs,
                    d_fp + '/%s/%s.image.idxs.pkl' % (split, split))

        # prepare reference captions to compute bleu scores later
        image_ids = {}
        feature_to_captions = {}
        i = -1
        for caption, image_id in zip(annotations['caption'],
                                     annotations['image_id']):
            if not image_id in image_ids:
                image_ids[image_id] = 0
                i += 1
                feature_to_captions[i] = []
            feature_to_captions[i].append(caption.lower() + ' .')
        save_pickle(feature_to_captions,
                    d_fp + '/%s/%s.references.pkl' % (split, split))
        print "Finished building %s caption dataset" % split

    #extract conv5_3 feature vectors
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for split in ['train', 'val', 'test']:
            anno_path = d_fp + '/%s/%s.annotations.pkl' % (split, split)
            save_path = d_fp + '/%s/%s.features.hkl' % (split, split)
            annotations = load_pickle(anno_path)
            image_path = list(annotations['file_name'].unique())
            n_examples = len(image_path)

            all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32)

            for start, end in zip(
                    range(0, n_examples, batch_size),
                    range(batch_size, n_examples + batch_size, batch_size)):
                print start, '-', end
                image_batch_file = image_path[start:end]
                image_batch = np.array(
                    map(lambda x: ndimage.imread(x, mode='RGB'),
                        image_batch_file)).astype(np.float32)
                feats = sess.run(vggnet.features,
                                 feed_dict={vggnet.images: image_batch})
                all_feats[start:end, :] = feats
                print("Processed %d %s features.." % (end, split))

            # use hickle to save huge feature vectors
            hickle.dump(all_feats, save_path)
            print("Saved %s.." % (save_path))
def main():

	with open(os.path.join(DATA_DIR, 'simulation.pickle'), 'rb') as f:
		start = time.time()
		
		# load sensor grid data (list of arrays)
		[grids, global_x_grid, global_y_grid] = pickle.load(f)

		# convert to numpy array
		grids = np.array(grids)

		end = time.time()
		print "Loading simulation datatook", end - start, len(grids), grids[0].shape

	# crop grids to the desired shape
	shape = (128,128)
	grids = np.array(grids)
	grids = crop_center(grids, shape[0])
	print grids.shape

	do_plot = True # Toggle me for DOGMA plots!

	# PARAMETERS
	p_B = 0.02                                            # birth probability
	Vb = 2*10**4                                          # number of new born particles
	V = 2*10**5                                           # number of consistent particles
	state_size = 4                                        # number of states: p,v: 4
	alpha = 0.9                                           # information ageing (discount factor)

	p_A = 1.0                                             # association probability: only relevant for Doppler measurements
	T = 0.1                                               # measurement frequency (10 Hz)
	p_S = 0.99                                            # particle persistence probability
	res = 1.                                              # resolution of the grid cells

	# velocity, acceleration variance initialization
	scale_vel = 12.
	scale_acc = 2.

	# position, velocity, acceleration process noise
	process_pos = 0.06
	process_vel = 2.4
	process_acc = 0.2

	# print debug values
	verbose = False

	# for plotting thresholds
	mS = 3.
	epsilon = 10.
	epsilon_occ = 0.75

	# index where PF was interrupted
	index_stopped = 0

	# initialize a grid
	start = time.time()
	grid_cell_array = GridCellArray(shape, p_A)
	end =  time.time()
	print "grid_cell_array initialization took", end - start

	# initialize a particle array
	start = time.time()
	particle_array = ParticleArray(V, grid_cell_array.get_shape(), state_size, T, p_S, scale_vel, scale_acc, process_pos, process_vel, process_acc)
	end =  time.time()
	print "particle_array initialization took", end - start

	# data: [N x 2 x W x D]
	# second dimension is masses {0: m_free, 1: m_occ}
	# in original grid: 0: unknown, 1: occupied, 2: free (raw data)
	data = create_DST_grids(grids)

	# number of measurements in the run
	N = data.shape[0]

	# list of 4x128x128 grids with position, velocity information 
	DOGMA = []
	var_x_vel = []
	var_y_vel = []
	covar_xy_vel = []
	var_x_acc = []
	var_y_acc = []
	covar_xy_acc = []

	# run particle filter iterations
	for i in range(N):

		start = time.time()

		# initializes a measurement cell array
		meas_free = data[i,0,:,:].flatten()
		meas_occ = data[i,1,:,:].flatten()

		meas_cell_array = MeasCellArray(meas_free, meas_occ, grid_cell_array.get_shape(), pseudoG = 1.)

		# algorithm 1: ParticlePrediction (stored in particle_array)
		ParticlePrediction(particle_array, grid_cell_array, res=res)

		# algorithm 2: ParticleAssignment (stored in particle_array)
		ParticleAssignment(particle_array, grid_cell_array)

		# algorithm 3: OccupancyPredictionUpdate (stored in grid_cell_array)
		OccupancyPredictionUpdate(meas_cell_array, grid_cell_array, particle_array, p_B, alpha, check_values = verbose)

		# algorithm 4: PersistentParticleUpdate (stored in particle_array)
		PersistentParticleUpdate(particle_array, grid_cell_array, meas_cell_array, check_values = verbose)

		# algorithm 5: NewParticleInitialization
		if p_B == 0:
			empty_array = True
		else:
			empty_array = False
		birth_particle_array = ParticleArray(Vb, grid_cell_array.get_shape(), state_size, T, p_S, scale_vel, scale_acc, process_pos, process_vel, process_acc, birth = True, empty_array = empty_array)
		NewParticleInitialization(Vb, grid_cell_array, meas_cell_array, birth_particle_array, check_values = verbose)

		# algorithm 6: StatisticMoments (stored in grid_cell_array)
		StatisticMoments(particle_array, grid_cell_array)

		if (i + 1) > index_stopped:

			newDOGMA, new_var_x_vel, new_var_y_vel, new_covar_xy_vel = get_dogma(grid_cell_array, grids, state_size, grids[i,:,:], shape)
			
		var_x_vel.append(new_var_x_vel)
		var_y_vel.append(new_var_y_vel)
		covar_xy_vel.append(new_covar_xy_vel)

		# save the DOGMA at this timestep: before we had occupancy, free, but this is actually not the real occupancy plot
		# so we will just use the measurement grid for now
		if (i+1) > index_stopped:
			DOGMA.append(newDOGMA)

		# algorithm 7: Resample
		# skips particle initialization for particle_array_next because all particles will be copied in
		particle_array_next = ParticleArray(V, grid_cell_array.get_shape(), state_size, T, p_S, \
				                        scale_vel, scale_acc, process_pos, process_vel, process_acc, empty_array = True)
		Resample(particle_array, birth_particle_array, particle_array_next, check_values = verbose)

		# switch to new particle array
		particle_array = particle_array_next
		particle_array_next = None

		end = time.time()
		print "Time per iteration: ", end - start

		# Plotting: The environment is stored in grids[i] (matrix of  values (0,1,2))
		#           The DOGMA is stored in DOGMA[i]
		if (do_plot):
			head_grid = dogma2head_grid(DOGMA[i], var_x_vel[i], var_y_vel[i], covar_xy_vel[i], mS, epsilon, epsilon_occ)
			occ_grid = grids[i,:,:]
			title = "DOGMa Iteration %d" % i
			colorwheel_plot(head_grid, occ_grid=occ_grid, m_occ_grid = DOGMA[i][0,:,:], title=os.path.join(OUTPUT_DIR, title), show=True, save=True)

		print "Iteration ", i, " complete"

		hkl.dump([DOGMA, var_x_vel, var_y_vel, covar_xy_vel], os.path.join(OUTPUT_DIR, 'DOGMA.hkl'), mode='w')
		print "DOGMA written to hickle file."
		
	return
Example #47
0
def preprocess(p):
    p.load()
    #p = EEG.EEG('Patient_2', 'interictal', 17)
    print p
    #p.normalize_channels()
    #p.normalize_overall()

    print np.shape(p.data), p.data.nbytes  # == (16, ~240k)

    data = p.data
    #eeg = np.rollaxis(data, 1)
    #print p.data[0:1, 0:20]

    global bin_fft, signal_duration, sample_length
    if bin_fft is None:
        pow2 = np.log2(p.sample_rate_in_hz * signal_duration_min)

        sample_length = int(2.0**(int(pow2) + 1))  # in samples, rounds up
        signal_duration = sample_length / p.sample_rate_in_hz
        print "Pow2: ", pow2
        print "Signal duration : %6.2fsec = %d samples " % (
            signal_duration,
            sample_length,
        )

        ## Matrix that gathers FFT entries into buckets
        ## Want buckets to be [0 - 0.5 - 1.5 - 2.5 - 3.5 - ... - 49.5] Hz
        bin_array = np.linspace(0., 49., num=50)

        ## http://docs.scipy.org/doc/numpy/reference/routines.fft.html#module-numpy.fft

        #freq = fftpack.rfftfreq(n=sample_length, d=1./p.sample_rate_in_hz)
        freq = np.fft.rfftfreq(n=sample_length, d=1. / p.sample_rate_in_hz)
        #print freq[0:100]

        bin_fft = np.zeros((len(freq), len(bin_array)))
        for i, bn in enumerate(bin_array):
            bn_lower = (bin_array[i - 1] +
                        bin_array[i + 0]) / 2. if i > 0 else bn - 0.5
            bn_upper = (bin_array[i + 0] + bin_array[i + 1]
                        ) / 2. if i < len(bin_array) - 1 else bn + 0.5
            a = np.where((freq > bn_lower) & (freq <= bn_upper), 1, 0)
            bin_fft[:, i] = a
            #print bn_lower, bn, bn_upper

        #print bin_fft[0:20, 0:5]

    ## Now, take whole period, and find the start times in seconds
    signal_period_starts = np.arange(start=0,
                                     stop=p.length_in_sec - signal_duration,
                                     step=signal_period_step)
    #print signal_period_starts

    param_length = p.n_channels * np.shape(bin_fft)[1]  # len(bin_array)
    all_params = np.zeros((len(signal_period_starts), param_length),
                          dtype=np.complex64)

    for i, start_period in enumerate(signal_period_starts):
        sample_start = int(p.sample_rate_in_hz *
                           start_period)  # start time in seconds

        #z = fftpack.rfft(p.data[:, sample_start:], n=sample_length, axis=1)
        fft_raw = np.fft.rfft(p.data[:, sample_start:],
                              n=sample_length,
                              axis=1)
        #print np.shape(fft_raw)
        #print fft_raw[0:1, 0:20]

        binned = np.dot(fft_raw, bin_fft)
        #print np.shape(binned)
        #print binned[0:1, :]
        #print binned[0, 0]           # Check that first bin is equal to first sums...
        #print np.sum(fft_raw[0,0:6]) # Works!

        params = np.log(binned.ravel())

        all_params[i, :] = params

    print np.shape(all_params), all_params.nbytes

    to_hickle = dict(
        features=all_params,
        signal_period_starts=signal_period_starts,
    )

    # Dump data, with compression
    f = "data/feat/%s/%s_%s_segment_%04d.hickle" % (p.subject, p.subject,
                                                    p.desc, p.num)
    hickle.dump(to_hickle, f, mode='w', compression='gzip')
Example #48
0
X_test[0,:,:,:,:] = loadmat("../vim2/preprocessed/test.mat")['d'].transpose((0,3,1,2))

print X_test.shape

frame = 10
for i in range(nbat):
    if time.time() - starttime > 72000:
        break
    if frame + fperbat >= X_test.shape[1]:
        frame = X_test.shape[1] - fperbat
    test_errors = test_model.predict(X_test[:,frame-file_overlap:frame+fperbat,:,:,:], 1)
    outfile = RESULTS_SAVE_DIR + "/testerr"+str(i)+ ".hkl"
    print outfile
    print frame
    frame += fperbat
    hkl.dump(test_errors[0,file_overlap:], outfile)

#    hkl.dump(errs1[:,9,:], RESULTS_SAVE_DIR
#+ "errors_frame"+ str(b*batch_size+9+6) + "_" + str((b+1)*batch_size+9+6)
#+".hkl")

#X_hat = test_model.predict(X_test[1], batch_size)
#test_model._make_predict_function()
#f = test_model.predict_function
#errs1 = f(X_test[0])

# 



Example #49
0
    f_meta = "data/feat/%s/%s_meta_input.hickle" % (
        _subject,
        _subject,
    )

    if True or train_data:  # PREVIOUSY :: produce meta-data only from training set
        per_feature_min = np.min(all_features, axis=0)
        per_feature_max = np.max(all_features, axis=0)

        to_hickle = dict(
            signal_period_starts=signal_period_starts,
            per_feature_min=per_feature_min,
            per_feature_max=per_feature_max,
        )

        hickle.dump(to_hickle, f_meta, mode='w', compression='gzip')

    #else:
    #  from_hickle_meta = hickle.load(f_meta)
    #  per_feature_min = from_hickle_meta['per_feature_min']
    #  per_feature_max = from_hickle_meta['per_feature_max']

    norm_features = (all_features - per_feature_min) / (per_feature_max -
                                                        per_feature_min)

    to_hickle = dict(features=norm_features, )

    #f_out  = "data/feat/%s/%s_%s_input.hickle" % (_subject, _subject, ("train" if train_data else "test"), )
    f_out = "data/feat/%s/%s_input.hickle" % (
        _subject,
        _subject,
Example #50
0
# tensorboard
tensorboard = TensorBoard(log_dir="logs_dogma/{}".format(time()),
                          histogram_freq=1,
                          write_graph=False,
                          write_grads=False,
                          write_images=True)
callbacks.append(tensorboard)

history = model.fit_generator(train_generator, samples_per_epoch / batch_size, nb_epoch, callbacks=callbacks, \
                validation_data=val_generator, validation_steps=N_seq_val / batch_size)

# summarize history for loss
print(history.history.keys())

plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()
plt.savefig('loss_full_kitti_dogma_t_1.png')

# save history in a hickle file
hkl.dump(history.history, 'history_full_kitti_dogma_t_1.hkl', mode='w')

if save_model:
    json_string = model.to_json()
    with open(json_file, "w") as f:
        f.write(json_string)
Example #51
0
                        type=bool,
                        default=True,
                        help='Save test labels and predicted labels')

    args = parser.parse_args()

    seed = args.seed
    outdir = args.outdir
    # Load data and create label
    train_mat = hkl.load(args.train_data).astype(int)
    test_mat = hkl.load(args.test_data).astype(int)
    train_label = [1] * 1600 + [0] * 1600
    test_label = [1] * 400 + [0] * 400
    train_data, train_label = shuffle(train_mat, train_label, random_state=0)
    test_data, test_label = shuffle(test_mat, test_label, random_state=0)
    # Model
    seed = args.seed
    model = SVC(probability=True)
    # Train and predict
    model.fit(train_data, train_label)
    predict_label = model.predict(test_data)
    test_posterior_probability = model.predict_proba(test_data)
    test_label = np.array(test_label)
    # Save the result
    if args.save_result:
        if not os.path.exists(outdir):
            os.makedirs(outdir)
        hkl.dump(predict_label, outdir + 'predict_label.hkl')
        hkl.dump(test_posterior_probability,
                 outdir + 'test_posterior_probability.hkl')
        hkl.dump(test_label, outdir + 'test_label.hkl')
Example #52
0
def main():
    # batch size for extracting feature vectors from vggnet.
    batch_size = 100
    # maximum length of caption(number of word). if caption is longer than max_length, deleted.
    max_length = 15
    # if word occurs less than word_count_threshold in training dataset, the word index is special unknown token.
    word_count_threshold = 1
    # vgg model path
    vgg_model_path = './data/imagenet-vgg-verydeep-19.mat'
    ##### vgg model-> wget http://www.vlfeat.org/matconvnet/models/imagenet-vgg-verydeep-19.mat -P data/

    caption_file = '/home/most12lee/downloads/data/token_3000imgs.json'
    image_dir = '/home/most12lee/downloads/data/%s_resized/'

    # about 80000 images and 400000 captions for train dataset
    # -> ME: about 2100 images and 10500 captions for train datasets
    train_dataset = _process_caption_data(
        caption_file=
        '/home/most12lee/downloads/data/token_3000imgs_train.json',  #### DONT FORGET TO CHANGE CSV INTO JSON FILE!!!!!!
        image_dir='/home/most12lee/downloads/data/train_resized/',
        max_length=max_length)

    # about 40000 images and 200000 captions
    # -> ME: about 900 images and 4500 captions for val datasets
    val_dataset = _process_caption_data(
        caption_file=
        '/home/most12lee/downloads/data/token_3000imgs_val.json',  #### DONT FORGET TO CHANGE CSV INTO JSON FILE!!!!!!
        image_dir='/home/most12lee/downloads/data/val_resized/',
        max_length=max_length)

    # about 4000 images and 20000 captions for val / test dataset
    # -> ME: about 90 images and 450 captions for val / test datasets
    val_cutoff = int(0.1 * len(val_dataset))
    test_cutoff = int(0.2 * len(val_dataset))
    print 'Finished processing caption data'

    save_pickle(train_dataset, 'data/train/train.annotations.pkl')
    save_pickle(val_dataset[:val_cutoff], 'data/val/val.annotations.pkl')
    save_pickle(val_dataset[val_cutoff:test_cutoff].reset_index(drop=True),
                'data/test/test.annotations.pkl')

    for split in ['train', 'val', 'test']:
        annotations = load_pickle('./data/%s/%s.annotations.pkl' %
                                  (split, split))

        if split == 'train':
            word_to_idx = _build_vocab(annotations=annotations,
                                       threshold=word_count_threshold)
            save_pickle(word_to_idx, './data/%s/word_to_idx.pkl' % split)

        captions = _build_caption_vector(annotations=annotations,
                                         word_to_idx=word_to_idx,
                                         max_length=max_length)
        save_pickle(captions, './data/%s/%s.captions.pkl' % (split, split))

        file_names, id_to_idx = _build_file_names(annotations)
        save_pickle(file_names, './data/%s/%s.file.names.pkl' % (split, split))

        image_idxs = _build_image_idxs(annotations, id_to_idx)
        save_pickle(image_idxs, './data/%s/%s.image.idxs.pkl' % (split, split))

        # prepare reference captions to compute bleu scores later
        image_ids = {}
        feature_to_captions = {}
        i = -1
        for caption, image_id in zip(annotations['caption'],
                                     annotations['image_id']):
            if not image_id in image_ids:
                image_ids[image_id] = 0
                i += 1
                feature_to_captions[i] = []
            feature_to_captions[i].append(caption.lower() + ' .')
        save_pickle(feature_to_captions,
                    './data/%s/%s.references.pkl' % (split, split))
        print "Finished building %s caption dataset" % split

    # extract conv5_3 feature vectors
    vggnet = Vgg19(vgg_model_path)
    vggnet.build()
    with tf.Session() as sess:
        tf.initialize_all_variables().run()
        for split in ['train', 'val', 'test']:
            anno_path = './data/%s/%s.annotations.pkl' % (split, split)
            save_path = './data/%s/%s.features.hkl' % (split, split)
            annotations = load_pickle(anno_path)
            image_path = list(annotations['file_name'].unique())
            n_examples = len(image_path)

            all_feats = np.ndarray([n_examples, 196, 512], dtype=np.float32)

            for start, end in zip(
                    range(0, n_examples, batch_size),
                    range(batch_size, n_examples + batch_size, batch_size)):
                image_batch_file = image_path[start:end]
                image_batch = np.array(
                    map(lambda x: ndimage.imread(x, mode='RGB'),
                        image_batch_file)).astype(np.float32)
                feats = sess.run(vggnet.features,
                                 feed_dict={vggnet.images: image_batch})
                all_feats[start:end, :] = feats
                print("Processed %d %s features.." % (end, split))

            # use hickle to save huge feature vectors
            hickle.dump(all_feats, save_path)
            print("Saved %s.." % (save_path))
Example #53
0
if not os.path.exists('gedm.hkl'):
    N = 256
    d_2001 = np.zeros([N, N])
    d_2016 = np.zeros([N, N])

    for ii in range(N):
        print("%i / %i" % (ii + 1, N))
        for jj in range(N):
            l = float(ii) / N * 360 - 180
            b = float(jj) / N * 90 - 45
            d_2001[ii, jj] = pyne2001.get_galactic_dm(l, b)
            dm, tau = pyymw16.dist_to_dm(l, b, 30000)
            d_2016[ii, jj] = dm.value

    hkl.dump({'NE2001': d_2001, 'YMW16': d_2016}, 'gedm.hkl')
else:
    d = hkl.load('gedm.hkl')
    plt.figure(figsize=(9, 9))
    plt.subplot(3, 1, 1)
    plot_gplane(d['NE2001'], 'NE2001')

    plt.subplot(3, 1, 2)
    plot_gplane(d['YMW16'], 'YMW16')

    plt.subplot(3, 1, 3)
    d_delta = (d['YMW16'] - d['NE2001'])
    plot_gplane(d_delta, 'Difference')
    plt.xlabel("gl [deg]")
    plt.tight_layout()
    plt.savefig('compare_to_ne2001.png')
def quicklook(filename,
              save,
              dump,
              flag,
              merge,
              flatten,
              no_show,
              all_lsts,
              new_cal,
              sky=False,
              lfsm=False,
              emp=False):
    h5 = tb.open_file(filename)

    if new_cal: T_ant = apply_new_calibration(h5)
    else: T_ant = apply_calibration(h5)
    f_leda = T_ant['f']

    ant_ids = ['252', '254', '255']

    print("Plotting...")
    fig = plt.figure(figsize=(20, 20))
    #plt.suptitle(h5.filename)

    lst_stamps = T_ant['lst']
    indexes = np.arange(len(lst_stamps), dtype=np.int)

    if len(lst_stamps) == 0:
        raise RuntimeError("No LSTs in file")

    # Report discontinuities in time
    for i in range(1, len(lst_stamps)):
        if lst_stamps[i] - lst_stamps[i - 1] > 1 / 60.0:  # 1 minute
            print "Discontinuity at LST", lst_stamps[i], (
                lst_stamps[i] - lst_stamps[i - 1]) * 60 * 60, "seconds"

    utc_stamps = T_ant['utc']
    xlims = (f_leda[0], f_leda[-1])
    #ylims = mdates.date2num((T_ant['utc'][0], T_ant['utc'][-1]))
    #hfmt = mdates.DateFormatter('%m/%d %H:%M')
    ylims = (T_ant['lst'][0], T_ant['lst'][-1])

    # Work out altitude of Gal center and Sun. Use whichever is highest
    # and put that in the padding, which is the stripe.
    pad_length = 70
    padding = np.full((len(lst_stamps), pad_length), 10000)
    timing = lst_timing.LST_Timing(lst_stamps, utc_stamps)
    border_bottom, night_bottom, night_top, border_top = timing.calc_night()
    padding[night_bottom:night_top, :] = 1000

    #for ant in ant_ids:
    #  lst_stamps, T_ant[ant+"A"] = timing.align(T_ant[ant+"A"])
    #  lst_stamps, T_ant[ant+"B"] = timing.align(T_ant[ant+"B"])

    if night_bottom:
        print "Night", lst_stamps[night_bottom], "-", lst_stamps[night_top - 1]
    else:
        print "Night 0 - 0"

    # Use night only
    if not all_lsts:
        if not border_top:
            raise RuntimeError(
                "No LSTs available at night time (use --all_lsts to see all)")
        lst_stamps = lst_stamps[night_bottom:night_top]
        utc_stamps = utc_stamps[night_bottom:night_top]
        indexes = indexes[night_bottom:night_top]
        padding = padding[night_bottom:night_top]
        ylims = (lst_stamps[0], lst_stamps[-1])
        print len(lst_stamps), "usable LSTs"
    else:
        print "Using all LSTs"

    if len(lst_stamps) == 0:
        raise RuntimeError(
            "There are no data to display (number of LSTs is 0)")

    yloc = []
    ylabel = []
    try:
        for i in range(0, len(lst_stamps), len(lst_stamps) / 7):
            yloc.append(lst_stamps[i]), ylabel.append(("%.1f" % lst_stamps[i]))
    except:
        yloc.append(lst_stamps[0]), ylabel.append(("%.1f" % lst_stamps[0]))
        yloc.append(lst_stamps[-1]), ylabel.append(("%.1f" % lst_stamps[-1]))
    if all_lsts:
        new_x_high = xlims[1] + pad_length * (xlims[1] -
                                              xlims[0]) / len(f_leda)
    else:
        new_x_high = xlims[1]

    dump_data = {}

    if sky:
        if lfsm and emp:
            smdl = SkyModelLFSMEmp
            smlbl = 'LFSM+Emp'
        elif lfsm and not emp:
            smdl = SkyModelLFSM
            smlbl = 'LFSM'
        elif not lfsm and emp:
            smdl = SkyModelGSMEmp
            smlbl = 'GSM+Emp'
        else:
            smdl = SkyModelGSM
            smlbl = 'GSM'
        sy = smdl(pol='y')
        sx = smdl(pol='x')
        T_y_asm = sy.generate_tsky(lst_stamps, f_leda * 1e6)
        T_x_asm = sx.generate_tsky(lst_stamps, f_leda * 1e6)

    if flag and merge:
        # If we are going to merge the flags across antennas, we need to flag them all now
        for p in (0, 1):
            for ii, key in enumerate(ant_ids):
                ant = key + ("B" if p else "A")
                T_flagged = T_ant[ant]
                if not all_lsts:
                    # Do flagging with a border around the data in time
                    masks = rfi_flag(T_flagged[border_bottom:border_top],
                                     freqs=f_leda)
                    new_mask = masks.combine(do_not_excise_dtv=True)

                    new_mask = new_mask[night_bottom -
                                        border_bottom:night_top -
                                        border_bottom]  # remove border
                else:
                    masks = rfi_flag(T_flagged, freqs=f_leda)
                    new_mask = masks.combine(do_not_excise_dtv=True)

                    print ant, "Biggest DTV gap", lst_stamps[biggest_gap(
                        masks.dtv_tms)[1]], "-", lst_stamps[biggest_gap(
                            masks.dtv_tms)[0]], "waterfall"
                try:
                    merged_mask |= new_mask
                except NameError:
                    merged_mask = new_mask

    for p in [0, 1]:
        for ii, key in enumerate(ant_ids):
            if p == 0 and ii == 0:
                ax = fig.add_subplot(2, 3, 3 * p + ii + 1)
                origAX = ax
            else:
                ax = fig.add_subplot(2,
                                     3,
                                     3 * p + ii + 1,
                                     sharex=origAX,
                                     sharey=origAX)

            if p == 0:
                ant = key + "A"
            else:
                ant = key + "B"

            T_flagged = T_ant[ant]
            if not all_lsts:
                T_flagged = T_flagged[night_bottom:night_top]

            print "Max", np.max(T_flagged), "Min", np.min(T_flagged)

            masks = {}
            if flag:
                if merge:
                    ## Already done
                    T_flagged = np.ma.array(T_flagged, mask=merged_mask)
                else:
                    ## Need to do it now - there's probably a way to deal with
                    ## this all in one pass
                    if not all_lsts:
                        masks = rfi_flag(T_ant[ant][border_bottom:border_top],
                                         freqs=f_leda)
                        T_flagged = masks.apply_as_mask(
                            T_ant[ant][border_bottom:border_top],
                            do_not_excise_dtv=True)
                        T_flagged = T_flagged[night_bottom -
                                              border_bottom:night_top -
                                              border_bottom]  # Remove border

                        masks.chop(night_bottom - border_bottom,
                                   night_top - border_bottom)
                    else:
                        masks = rfi_flag(T_flagged, freqs=f_leda)
                        T_flagged = masks.apply_as_mask(T_flagged,
                                                        do_not_excise_dtv=True)

                        print ant, "Biggest DTV gap", lst_stamps[biggest_gap(
                            masks.dtv_tms)[1]], "-", lst_stamps[biggest_gap(
                                masks.dtv_tms)[0]], "waterfall"
                print "After flagging", "Max", np.ma.max(
                    T_flagged), "Min", np.ma.min(T_flagged)

            try:
                T_asm = T_y_asm if p == 0 else T_x_asm
                scale_offset_asm = robust.mean(T_asm / T_flagged)
                T_flagged = T_flagged - T_asm / scale_offset_asm
            except NameError:
                pass

            T_flagged = pad_data(T_flagged)  # Up to 2400 channels

            if dump:
                if not all_lsts:
                    if masks:
                        dump_data[ant + "_flagged"] = masks.apply_as_nan(
                            T_ant[ant][night_bottom:night_top])

                    dump_data[ant] = T_ant[ant][night_bottom:night_top]
                else:
                    if masks:
                        dump_data[ant + "_flagged"] = masks.apply_as_nan(
                            T_ant[ant])
                    dump_data[ant] = T_ant[ant]
                dump_data[ant + "_rms"] = add_uncertainties(T_flagged)
                av = np.ma.average(T_flagged, axis=0)
                weighted = av / dump_data[ant + "_rms"]**2
                dump_data[ant + "_weighted"] = weighted
                if masks:
                    dump_data[ant + "_dtv_times"] = np.array(masks.dtv_tms)
                    dump_data[ant + "_masks"] = masks.masks

            if flag:
                total = T_flagged.shape[0] * T_flagged.shape[1]
                num_in = np.ma.MaskedArray.count(T_flagged)
                print ant, ("%.1f%%" % (100 * float(total - num_in) / total)
                            ), "flagged.", "Count:", total - num_in

            # Add the stripe onto the right edge of the data and adjust the extent of the x-axis (frequency) to cover the stripe.
            if all_lsts:
                T_flagged_plot = np.ma.concatenate((T_flagged, padding),
                                                   axis=1)
            else:
                T_flagged_plot = T_flagged

            ax.set_yticks(yloc)
            ax.set_yticklabels(ylabel)
            ax.tick_params(axis='y', pad=2)

            if flatten:
                if type(T_flagged_plot) is np.ma.core.MaskedArray:
                    abp = np.ma.median(T_flagged_plot.data, axis=0)
                else:
                    abp = np.ma.median(T_flagged_plot, axis=0)
                abp /= np.ma.median(abp)
                T_flagged_plot /= abp
                try:
                    clim = (percentile(T_flagged_plot.compressed(), 5),
                            percentile(T_flagged_plot.compressed(), 95))
                except AttributeError:
                    clim = (percentile(T_flagged_plot,
                                       5), percentile(T_flagged_plot, 95))

            elif sky:
                clim = (-250, 500)
            else:
                clim = (1000, 10000)
            if ant != "252B":
                im = ax.imshow(
                    T_flagged_plot,  # / np.median(xx, axis=0), 
                    cmap="viridis",
                    aspect='auto',
                    interpolation='nearest',
                    clim=clim,
                    extent=(xlims[0], new_x_high, ylims[1], ylims[0]))

            ax.set_title(ant)
            if p == 1:
                ax.set_xlabel("Frequency [MHz]")
            if ii == 0:
                ax.set_ylabel("LST [hr]")
            #ax.yaxis_date()
            #ax.yaxis.set_major_formatter(hfmt)
            #

    if not flatten:
        fig.subplots_adjust(left=0.07)
        fig.subplots_adjust(right=0.875)
        cbar_ax = fig.add_axes([0.9, 0.125, 0.025, 0.75])
        cbar = fig.colorbar(im, cax=cbar_ax)

        #plt.subplot(2,3,3)
        #cbar = plt.colorbar()
        if sky:
            cbar.set_label("Temperature - %s [K]" % smlbl)
        else:
            cbar.set_label("Temperature [K]")
        cbar.ax.tick_params(axis='y', pad=2)
        #plt.tight_layout()

    plt.text(0.005,
             0.005,
             get_repo_fingerprint(),
             transform=fig.transFigure,
             size=8)

    if save:
        plt.savefig(os.path.basename(filename)[:-3] + ".png")
    if not no_show:
        plt.show()

    if dump:
        dump_data["lsts"] = lst_stamps
        dump_data["utcs"] = np.array([str(pytime) for pytime in utc_stamps])
        dump_data["indexes"] = indexes
        dump_data["frequencies"] = pad_frequencies(f_leda)
        dump_data["options"] = "Flag="+str(flag) \
          + " Filename="+filename \
          + " New cal="+str(new_cal) \
                               + " Merge="+str(merge) \
                               + " Flatten="+str(flatten) \
                               + " All LSTs="+str(all_lsts) \
                               + " Sky Model Substract="+str(sky) \
                               + " Use LFSM="+str(lfsm) \
                               + " Apply empirical gain correction="+str(emp)
        dump_data["fingerprint"] = get_repo_fingerprint()
        import json

        def jdefault(o):
            return o.__dict__

        dump_data["params"] = json.dumps(params, default=jdefault)

        hickle.dump(dump_data, os.path.basename(filename)[:-3] + ".hkl")
Example #55
0
            if not np.isnan(
                    snp):  # zero by default, but insert 1 where the geno is
                assert -1 < snp < 3  # only 0, 1, and 2 or minor homozygous, heterozygous and major homozygous
                dataset[row][offset] = snp
            else:
                dataset[row][offset] = 5  # substitute nan's by 5
            offset += 1


if __name__ == '__main__':
    args = parse_args()
    print('Called with args:')
    print(args)

    snps = Bed(args.snps, count_A1=False)  # count_A1 counts the allels numbers
    phenos = pd.read_csv(
        '/Users/ioneliabuzatu/PycharmProjects/biobank/obesity/data/cleaned.csv',
        sep=',')[-25:]
    phenos = phenos.reset_index()

    iid_patients = phenos.loc[:, 'f.eid']

    data_on(ondisk=snps, patients_=iid_patients)

    print("making the geno file...")
    # pd.DataFrame(dataset).to_csv("/Users/ioneliabuzatu/PycharmProjects/biobank/obesity/data/genos.csv", sep=' ', header=None, index=False)
    hkl.dump(
        dataset,
        "/Users/ioneliabuzatu/PycharmProjects/biobank/obesity/data/bmi_val_25.hkl",
        mode='w')
Example #56
0
lr_contacts_norm_dict = {
    item: np.log2(lr_contacts_dict[item] * max_lr_contact /
                  sum(sum(lr_contacts_dict[item])) + 1)
    for item in lr_contacts_dict.keys()
}

max_hr_contact_norm = {
    item: hr_contacts_norm_dict[item].max()
    for item in hr_contacts_dict.keys()
}
max_lr_contact_norm = {
    item: lr_contacts_norm_dict[item].max()
    for item in lr_contacts_dict.keys()
}

hkl.dump(nb_hr_contacts, 'data/%s/nb_hr_contacts.hkl' % cell)
hkl.dump(nb_lr_contacts, 'data/%s/nb_lr_contacts.hkl' % cell)

hkl.dump(max_hr_contact_norm, 'data/%s/max_hr_contact_norm.hkl' % cell)
hkl.dump(max_lr_contact_norm, 'data/%s/max_lr_contact_norm.hkl' % cell)


def crop_hic_matrix_by_chrom(chrom, norm_type=0, size=40, thred=200):
    #thred=2M/resolution
    #norm_type=0-->raw count
    #norm_type=1-->log transformation
    #norm_type=2-->scaled to[-1,1]after log transformation, default
    #norm_type=3-->scaled to[0,1]after log transformation
    distance = []
    crop_mats_hr = []
    crop_mats_lr = []
Example #57
0
def save_batches(file_list,
                 tar_dir,
                 img_size=48,
                 batch_size=256,
                 flag_avg=False,
                 num_sub_batch=1):
    '''
    num_sub_batch is for parallelling using multiple gpus, it should be
    2, 4, or 8,
    where the indexing is reverted binary number
    when 2, the files ends with _0.pkl and _1.pkl
    when 4, with _00.pkl, _10.pkl, _01.pkl and _11.pkl

    '''

    if not os.path.exists(tar_dir):
        os.makedirs(tar_dir)

    img_batch = np.zeros((3, img_size, img_size, batch_size), np.uint8)

    if flag_avg:
        img_sum = np.zeros((3, img_size, img_size))

    batch_count = 0
    count = 0
    for file_name in file_list:
        img_batch[:, :, :, count % batch_size] = \
            get_img(file_name, img_size=img_size, batch_size=batch_size)

        count += 1
        if count % batch_size == 0:
            batch_count += 1

            if flag_avg:
                img_sum += img_batch.mean(axis=3)

            if num_sub_batch == 1:
                save_name = '%04d' % (batch_count - 1) + '.hkl'
                hkl.dump(img_batch, os.path.join(tar_dir, save_name), mode='w')

            elif num_sub_batch == 2:
                half_size = batch_size / 2
                save_name = '%04d' % (batch_count - 1) + '_0.hkl'
                hkl.dump(img_batch[:, :, :, :half_size],
                         os.path.join(tar_dir, save_name),
                         mode='w')

                save_name = '%04d' % (batch_count - 1) + '_1.hkl'
                hkl.dump(img_batch[:, :, :, half_size:],
                         os.path.join(tar_dir, save_name),
                         mode='w')

            elif num_sub_batch == 4:
                q1 = batch_size / 4
                q2 = batch_size / 2
                q3 = batch_size / 4 * 3

                save_name = '%04d' % (batch_count - 1) + '_00.hkl'
                hkl.dump(img_batch[:, :, :, :q1],
                         os.path.join(tar_dir, save_name),
                         mode='w')
                save_name = '%04d' % (batch_count - 1) + '_10.hkl'
                hkl.dump(img_batch[:, :, :, q1:q2],
                         os.path.join(tar_dir, save_name),
                         mode='w')
                save_name = '%04d' % (batch_count - 1) + '_01.hkl'
                hkl.dump(img_batch[:, :, :, q2:q3],
                         os.path.join(tar_dir, save_name),
                         mode='w')
                save_name = '%04d' % (batch_count - 1) + '_11.hkl'
                hkl.dump(img_batch[:, :, :, q3:],
                         os.path.join(tar_dir, save_name),
                         mode='w')
            else:
                NotImplementedError("num_sub_batch has to be 1, 2, or 4")

    return img_sum / batch_count if flag_avg else None
def quicklook(filename, save, dump, flag, merge, flatten, no_show, all_lsts):
    h5 = tb.open_file(filename)

    T_ant = apply_calibration(h5)
    f_leda = T_ant['f']

    ant_ids = ['252', '254', '255']

    print("Plotting...")
    fig = plt.figure(figsize=(12, 12))
    #plt.suptitle(h5.filename)

    lst_stamps = T_ant['lst']
    if len(lst_stamps) == 0:
        print "No LSTS in file"
        exit(1)

    # Report discontinuities in time
    for i in range(1, len(lst_stamps)):
        if lst_stamps[i] - lst_stamps[i - 1] > 1 / 60.0:  # 1 minute
            print "Discontinuity at LST", lst_stamps[i], (
                lst_stamps[i] - lst_stamps[i - 1]) * 60 * 60, "seconds"

    utc_stamps = T_ant['utc']
    xlims = (f_leda[0], f_leda[-1])
    #ylims = mdates.date2num((T_ant['utc'][0], T_ant['utc'][-1]))
    #hfmt = mdates.DateFormatter('%m/%d %H:%M')
    ylims = (T_ant['lst'][0], T_ant['lst'][-1])

    # Work out altitude of Gal center and Sun. Use whichever is highest
    # and put that in the padding, which is the stripe.
    unusable_lsts = []
    pad_length = 70
    padding = np.zeros((len(lst_stamps), pad_length))
    for i, d in enumerate(utc_stamps):
        ovro.date = d
        sun.compute(ovro)
        gal_center.compute(ovro)
        if sun.alt > -15 * np.pi / 180 or gal_center.alt > -15 * np.pi / 180:
            padding[i, :] = 10000
            unusable_lsts.append(i)
        else:
            padding[i, :] = 1000

    # Delete sun up LSTS
    if not all_lsts:
        print "Cutting out times when sun/galaxy up"
        padding = np.delete(padding, unusable_lsts, axis=0)
        lst_stamps = np.delete(lst_stamps, unusable_lsts, axis=0)
        utc_stamps = np.delete(utc_stamps, unusable_lsts, axis=0)
        if len(lst_stamps) == 0:
            print "No LSTs available at night time (use --all_lsts to see all)"
            exit(1)
        ylims = (lst_stamps[0], lst_stamps[-1])
        print len(lst_stamps), "usable LSTs"
    else:
        print "Using all LSTs"
    if len(lst_stamps) == 0:
        print "There is no data to display (number of LSTs is 0)"
        exit(1)

    yloc = []
    ylabel = []
    for i in range(0, len(lst_stamps), len(lst_stamps) / 7):
        yloc.append(lst_stamps[i]), ylabel.append(("%.1f" % lst_stamps[i]))
    if all_lsts:
        new_x_high = xlims[1] + pad_length * (xlims[1] -
                                              xlims[0]) / len(f_leda)
    else:
        new_x_high = xlims[1]

    dump_data = {}

    if flag and merge:
        # If we are going to merge the flags across antennas, we need to flag them all now
        for p in (0, 1):
            for ii, key in enumerate(ant_ids):
                ant = key + ("B" if p else "A")
                T_flagged = T_ant[ant]
                if not all_lsts:
                    T_flagged = np.delete(T_flagged, unusable_lsts, axis=0)
                new_mask = rfi_flag(T_flagged, freqs=f_leda).mask
                try:
                    merged_mask |= new_mask
                except NameError:
                    merged_mask = new_mask

    for p in [0, 1]:

        for ii, key in enumerate(ant_ids):
            if p == 0 and ii == 0:
                ax = fig.add_subplot(2, 3, 3 * p + ii + 1)
                origAX = ax
            else:
                ax = fig.add_subplot(2,
                                     3,
                                     3 * p + ii + 1,
                                     sharex=origAX,
                                     sharey=origAX)

            if p == 0: ant = key + "A"
            else: ant = key + "B"

            T_flagged = T_ant[ant]
            if not all_lsts:
                T_flagged = np.delete(T_flagged, unusable_lsts, axis=0)

            print "Max", np.max(T_flagged), "Min", np.min(T_flagged)

            if flag:
                if merge:
                    ## Already done
                    T_flagged = np.ma.array(T_flagged, mask=merged_mask)
                else:
                    ## Need to do it now - there's probably a way to deal with
                    ## this all in one pass
                    T_flagged = rfi_flag(T_flagged, freqs=f_leda)
                print "After flagging", "Max", np.ma.max(
                    T_flagged), "Min", np.ma.min(T_flagged)

            if dump:
                dump_data[ant] = T_flagged
                dump_data[ant + "_rms"] = add_uncertainties(T_flagged)
                av = np.ma.average(T_flagged, axis=0)
                weighted = av / dump_data[ant + "_rms"]**2
                dump_data[ant + "_weighted"] = weighted

            if flag:
                total = T_flagged.shape[0] * T_flagged.shape[1]
                num_in = np.ma.MaskedArray.count(T_flagged)
                print ant, ("%.1f%%" % (100 * (total - num_in) / total)
                            ), "flagged.", "Count:", total - num_in

            # Add the stripe onto the right edge of the data and adjust the extent of the x-axis (frequency) to cover the stripe.
            if all_lsts:
                T_flagged_plot = np.ma.concatenate((T_flagged, padding),
                                                   axis=1)
            else:
                T_flagged_plot = T_flagged

            ax.set_yticks(yloc)
            ax.set_yticklabels(ylabel)
            ax.tick_params(axis='y', pad=2)

            if flatten:
                if type(T_flagged_plot) is np.ma.core.MaskedArray:
                    abp = np.ma.median(T_flagged_plot.data, axis=0)
                else:
                    abp = np.ma.median(T_flagged_plot, axis=0)
                abp /= np.ma.median(abp)
                T_flagged_plot /= abp
                try:
                    clim = (percentile(T_flagged_plot.compressed(), 5),
                            percentile(T_flagged_plot.compressed(), 95))
                except AttributeError:
                    clim = (percentile(T_flagged_plot,
                                       5), percentile(T_flagged_plot, 95))

            else:
                clim = (1000, 10000)

            im = ax.imshow(
                T_flagged_plot,  # / np.median(xx, axis=0), 
                cmap='jet',
                aspect='auto',
                interpolation='nearest',
                clim=clim,
                extent=(xlims[0], new_x_high, ylims[1], ylims[0]))

            ax.set_title(ant)
            if p == 1: ax.set_xlabel("Frequency [MHz]")
            if ii == 0: ax.set_ylabel("LST [hr]")
            #ax.yaxis_date()
            #ax.yaxis.set_major_formatter(hfmt)
            #

    if not flatten:
        fig.subplots_adjust(left=0.07)
        fig.subplots_adjust(right=0.875)
        cbar_ax = fig.add_axes([0.9, 0.125, 0.025, 0.75])
        cbar = fig.colorbar(im, cax=cbar_ax)

        #plt.subplot(2,3,3)
        #cbar = plt.colorbar()
        cbar.set_label("Temperature [K]")
        cbar.ax.tick_params(axis='y', pad=2)
        #plt.tight_layout()

    if save:
        plt.savefig(os.path.basename(filename)[:-3] + ".png")
    if not no_show:
        plt.show()

    if dump:
        dump_data["lsts"] = lst_stamps
        dump_data["utcs"] = np.array([str(pytime) for pytime in utc_stamps])
        dump_data["frequencies"] = f_leda
        dump_data["options"] = "Flag=" + str(flag) + " Merge=" + str(
            merge) + " Flatten=" + str(flatten) + " All LSTSs=" + str(all_lsts)
        hickle.dump(dump_data, os.path.basename(filename)[:-3] + ".hkl")
                     figsize=(15, 15),
                     savename=savename,
                     show=False)

    # ===========================================================================
    # Save mask

    print("\tSaving mask as hickle binary ...")

    # convert to int32 for memory efficiency
    nuclei = nuclei.astype(np.int32)

    # see: https://github.com/telegraphic/hickle
    savename = mask_save_path + imname.split(".")[0] + ".hkl"
    with open(savename, 'w') as f:
        hkl.dump(nuclei, f)

    # ===========================================================================
    # Divide into (potentially overlapping) FOVs and save

    # Get FOV bounds
    (M, N, Depth) = im.shape
    FOV_bounds = get_fov_bounds(M, N, fov_dims=fov_dims, shift_step=shift_step)
    n_fovs = len(FOV_bounds)

    savename_ims_base = input_for_maskrcnn_path_images + imname.split(".")[0]
    savename_masks_base = input_for_maskrcnn_path_labels + imname.split(".")[0]

    # size threshold for exclusion (edge of tile)
    min_pixels = 150
Example #60
0
def make_optflow_dataset(dataset="train"):
    if dataset == "train":
        ID = TRAIN_PEOPLE_ID
    elif dataset == "dev":
        ID = DEV_PEOPLE_ID
    else:
        ID = TEST_PEOPLE_ID

    # Setup parameters for optical flow.
    farneback_params = dict(winsize=20,
                            iterations=1,
                            flags=cv2.OPTFLOW_FARNEBACK_GAUSSIAN,
                            levels=1,
                            pyr_scale=0.5,
                            poly_n=5,
                            poly_sigma=1.1,
                            flow=None)

    frames_idx = parse_sequence_file()

    data = []

    for category in CATEGORIES:
        # Get all files in current category's folder.
        folder_path = os.path.join(category)
        filenames = sorted(os.listdir(folder_path))

        for filename in filenames:
            filepath = os.path.join(category, filename)

            # Get id of person in this video.
            person_id = int(filename.split("_")[0][6:])
            if person_id not in ID:
                continue

            vid = imageio.get_reader(filepath, "ffmpeg")

            flow_x = []
            flow_y = []

            prev_frame = None
            # Add each frame to correct list.
            for i, frame in enumerate(vid):
                # Boolean flag to check if current frame contains human.
                ok = False
                for seg in frames_idx[filename]:
                    if i >= seg[0] and i <= seg[1]:
                        ok = True
                        break
                if not ok:
                    continue

                # Convert to grayscale.
                frame = Image.fromarray(np.array(frame))
                frame = frame.convert("L")
                frame = np.array(frame.getdata(), dtype=np.uint8).reshape(
                    (120, 160))
                frame = np.array(Image.fromarray(frame).resize((60, 80)))

                if prev_frame is not None:
                    # Calculate optical flow.
                    flows = cv2.calcOpticalFlowFarneback(
                        prev_frame, frame, **farneback_params)
                    subsampled_x = np.zeros((30, 40), dtype=np.float32)
                    subsampled_y = np.zeros((30, 40), dtype=np.float32)

                    for r in range(30):
                        for c in range(40):
                            subsampled_x[r, c] = flows[r * 2, c * 2, 0]
                            subsampled_y[r, c] = flows[r * 2, c * 2, 1]

                    flow_x.append(subsampled_x)
                    flow_y.append(subsampled_y)

                prev_frame = frame

            data.append({
                "filename": filename,
                "category": category,
                "flow_x": flow_x,
                "flow_y": flow_y
            })

    hkl.dump(data, "%s_flow.hkl" % dataset)