def read_data(is_in_data_file): # READ DATE IF IS AVAILABLE if (is_in_data_file): with open('data/data', 'rb') as f: data = pickle.load(f) return data power_data = sp.delete(sp.genfromtxt("data/Power_history.csv", delimiter=","), 0, 1).flatten() weather_data = sp.stack([x.flatten() for x in sp.delete([sp.genfromtxt("data/" + filename, delimiter=",") for filename in filenames], [0, 1], 2)]) weather_data = sp.delete(weather_data, sp.s_[:18], 1) # PREPROCESSING # REDUCE BROKEN DATA weather_data = sp.stack(x[~sp.isnan(x)] for x in weather_data) weather_data = sp.stack(x[~sp.isnan(power_data)] for x in weather_data) power_data = power_data[~sp.isnan(power_data)] data = sp.vstack([weather_data, power_data]) data = data.transpose() # SELECT FILES TO BE INCLUDED IN COMPUTATION, POWER DATA ARE ALWAYS AT DATA[-1] !!! data = preproces(data, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) # WRITING OUTPUT DATA TO FILE 'DATA' with open('data/data', 'wb') as f: pickle.dump(data, f) return data
def get_HRRR_data(filename): grbs = pygrib.open(filename) msgs = [str(grb) for grb in grbs] string = 'Geopotential Height:gpm' temp = [ msg for msg in msgs if msg.find(string) > -1 and msg.find('isobaricInhPa') > -1 ] pressure_levels_Pa = s.array([int(s.split(' ')[3]) for s in temp]) geo_pot_height_grbs = grbs.select(name = 'Geopotential Height', \ typeOfLevel='isobaricInhPa', level=lambda l: l > 0) temperature_grbs = grbs.select(name = 'Temperature', \ typeOfLevel='isobaricInhPa', level=lambda l: l > 0) rh_grbs = grbs.select(name = 'Relative humidity', \ typeOfLevel='isobaricInhPa', level=lambda l: l > 0) lat, lon = geo_pot_height_grbs[0].latlons() geo_pot_height = s.stack([grb.values for grb in geo_pot_height_grbs]) temperature = s.stack([grb.values for grb in temperature_grbs]) rh = s.stack([grb.values for grb in rh_grbs]) return lat, lon, geo_pot_height, temperature, rh, pressure_levels_Pa
def unpack_segment(Seg, num_lags=200, intercept=True): """ """ lags = sp.arange(-num_lags / 2, num_lags / 2, 1, dtype='int32') Y = sp.stack([asig.magnitude.flatten() for asig in Seg.analogsignals], axis=1) t_start = Seg.analogsignals[0].t_start.rescale('ms') t_stop = Seg.analogsignals[0].t_stop.rescale('ms') dt = Seg.analogsignals[0].sampling_period.rescale('ms') X = [] for event in Seg.events: st = neo.core.SpikeTrain(event.times, t_stop=t_stop) bst = ele.conversion.BinnedSpikeTrain(st, binsize=dt, t_start=t_start, t_stop=t_stop) reg = bst.to_array().flatten() for lag in lags: X.append(sp.roll(reg, lag)) X = sp.stack(X, axis=1) if intercept: X = sp.concatenate([sp.ones((X.shape[0], 1)), X], 1) t_lags = lags * dt.magnitude return Y, X, t_lags
def gen_sample(f, vnoise, xnoise): true_vx = f(t) true_x = cumtrapz(true_vx, t) true_x = sp.hstack([[0], true_x]) noisy_vx = f(t + sp.random.randn(*t.shape) * xnoise) + sp.random.randn(*t.shape) * vnoise noisy_x = true_x + sp.random.randn(*t.shape) * xnoise noisy_x = noisy_x return sp.stack([true_x, true_vx]).T, sp.stack([noisy_x, noisy_vx]).T
def gen_sample(v, vnoise_sigma, xnoise_mu1,xnoise_mu2, xnoise_sigma1,xnoise_sigma2): true_vx = v*sp.ones_like(t) #Velocity is taken as constant #Trapezoidal rule integration of velocity into position true_x = cumtrapz(true_vx,t,initial=0) #Velocity only has Gaussian noise (this might have to be changed) noisy_vx = true_vx+sp.random.randn(*t.shape)*vnoise_sigma #Position has bimodal noise noise_dist = bimodal_gaussian(xnoise_mu1,xnoise_mu2,xnoise_sigma1,xnoise_sigma2,-10,10,150) noisy_x = true_x+noise_dist.sample(*t.shape) return sp.stack([true_x,true_vx]).T, sp.stack([noisy_x,noisy_vx]).T
def feros_velocity_correction(spec, create_fits=False, rv=False, out=''): """Radial velocity correction for a FEROS spectrum. Parameters ---------- spec : file Fits file with FEROS spectra reduced with CERES. create_fits : bool, optional True to save a fits file with the result. Returns ------- wavelength : array_like An array with the rest frame wavelengths. flux : array_like An array with the corresponding fluxes. """ # Read fits file hdul = fits.open(spec) # Extract RV if not rv: rv = hdul[0].header['RV'] * u.km / u.s rv = rv.to(u.m / u.s) # Create gamma beta = rv / const.c gamma = 1 + beta.value # Extract wavelength per order wave = hdul[0].data[0, :, :] # Extract flux per order flux = hdul[0].data[9, :, :] orders = wave.shape[0] wave_rest = copy.deepcopy(wave) # Move spectra to rest frame for o in range(orders): wave_rest[o, :] /= gamma if create_fits: # Create new fits file if not out: out = spec.split('.fits')[0] else: date = hdul[0].header['HIERARCH SHUTTER START DATE'].split('-') ut = hdul[0].header['HIERARCH SHUTTER START UT'].split(':') out += hdul[0].header['HIERARCH TARGET NAME'] + '_' for d in date: out += d out += '_UT' for u in ut: out += u out += '_rest_frame.fits' hdu = fits.PrimaryHDU(sp.stack((wave_rest, flux))) try: hdu.writeto(out) except OSError: os.remove(out) hdu.writeto(out) hdul.close() return wave_rest, flux
def events2span(Data, entry_event, exit_event): entry_event = "TRIAL_ENTRY_EVENT" exit_event = "ITI_STATE" data_entry = Data.groupby("name").get_group(entry_event) data_exit = Data.groupby("name").get_group(exit_event) if data_entry.shape[0] == data_exit.shape[0]: # easy peasy Span = pd.DataFrame(sp.stack([data_entry['t'].values,data_exit['t'].values],axis=1),columns=['t_on','t_off']) Span['dt'] = Span['t_off'] - Span['t_on'] return Span if data_entry.shape[0] != data_exit.shape[0]: print("problems occur: unequal number of entry and exits") ts = [] for tup in data_entry.itertuples(): t_on = tup.t t_max = data_exit.iloc[-1]['t'] try: t_off = bhv.time_slice(data_exit, t_on, t_max, 't').iloc[0]['t'] ts.append((t_on,t_off)) except IndexError: # thrown when last is on pass Span = pd.DataFrame(ts,columns=['t_on','t_off']) Span['dt'] = Span['t_off'] - Span['t_on'] return Span
def exportCV(x, y): global ncv curr = dataCube[y, x, :] cv = sp.stack((E, curr), axis=-1) path = filePath + '_' + str(ncv) + '_x' + str(x) + '_y' + str(y) + '.txt' sp.savetxt(path, cv, delimiter='\t') ncv += 1
def get_prf_data(pixel_values, pixel_stddev, pixel_offsets, error_threshold=0.1): """ Return the PRF measurements for (a subset of) the image. Args: pixel_values(2-D float array): The calibrated pixel responses from the image to include in the plot. pixel_stddev(2-D float array): The estimated standard deviation of `pixel_values`. pixel_offsets: The slice of the return value of find_pixel_offsets() corresponding to `pixel_values`. Returns: (2-D float array, 2-D float array, 2-D float array, 2-D float array): * The x-offsets of the points at which PRF measurements are available. * The y-offsets of the points at which PRF measurements are available. * The measured normalized PRF at the available offsets * estimated errors of the PRF measurements. """ prf_measurements = ( ( pixel_values - pixel_offsets['zero_point'] ) / pixel_offsets['norm'] ) prf_errors = ( pixel_stddev / pixel_offsets['norm'] ) #False positive #pylint: disable=assignment-from-no-return include = scipy.logical_and(scipy.isfinite(prf_measurements), scipy.isfinite(prf_errors)) include = scipy.logical_and(include, prf_errors < error_threshold) #pylint: enable=assignment-from-no-return return scipy.stack(( pixel_offsets['x_off'][include], pixel_offsets['y_off'][include], prf_measurements[include], prf_errors[include] ))
def exportCV(x,y): global ncv curr = dataCube[y,x,trim1:Nt-trim2]/Asc - y0 darkCurr = darkCube[y,x,:] photoCurr = photoCube[y,x,:] cv = sp.stack((E[trim1:Nt-trim2],curr,darkCurr,photoCurr),axis=-1) path = cvsPath + baseName + '_' + str(ncv) + '_x' + str(x) + '_y' + str(y) + '.txt' sp.savetxt(path,cv,delimiter='\t') ncv += 1
def full_image(i=None): global _full if _full is None: path = Path('cache/full.npy') if not path.exists(): ims = [_full_image(i) for i in range(1, COUNTS['full'] + 1)] sp.save(path, sp.stack(ims)) _full = sp.load(path) ims = _full[i - 1] if i is not None else _full return ims
def generate_index_map(nonzero_locs, shape): r""" Determines the i,j,k indicies of the flattened array """ # logger.info('creating index map of non-zero values...') x_c = sp.unravel_index(nonzero_locs, shape)[0].astype(sp.int16) y_c = sp.unravel_index(nonzero_locs, shape)[1].astype(sp.int16) z_c = sp.unravel_index(nonzero_locs, shape)[2].astype(sp.int16) index_map = sp.stack((x_c, y_c, z_c), axis=1) # return index_map
def quadrant_image(i=None): global _quadrant if _quadrant is None: path = Path('cache/quadrant.npy') if not path.exists(): ims = [ _quadrant_image(i) for i in range(1, COUNTS['quadrant'] + 1) ] sp.save(path, sp.stack(ims)) _quadrant = sp.load(path) ims = _quadrant[i - 1] if i is not None else _quadrant return ims
def bumpmaps(category, number=None): """Uses a NN to generate a perfect image, and caches the result so it'll be fast to load next time""" if number is None: return sp.stack([ bumpmaps(category, n) for n in tqdm(range(1, tools.COUNTS[category] + 1)) ]) path = Path(f'cache/nn/output/{category}/{number}.npy') if not path.exists(): path.parent.mkdir(exist_ok=True, parents=True) losses, model = load(*MODEL) bumps = evaluate(category, number, model)[1] sp.save(path, bumps) return sp.load(path)
def test_simple_mv(self): ''' Test a simple multivariate example (to the best of our abilities given we don't have a ground truth for this example). :return: ''' # load the second OX reference data set. with open(os.path.join(self.datapath, "2/params.json")) as f: params = json.loads(f.read()) yy = sp.array(parse_ox_csv( os.path.join(self.datapath, "2/raw_data.csv")), dtype=np.float64).transpose() y = sp.reshape(yy, (101, 25, 1))[1:] # now we'll make a few copies of the reference data and stack them along the third axis. y = sp.squeeze(sp.stack((y, y, y, y, y), axis=2)) rows = [] for i, group in enumerate(y): for entry in group: rows.append([i, "A"] + entry.tolist()) panel_series = PanelSeries.from_list(rows) # run the filtering Z = sp.matrix(params["translation_matrix"]) F = sp.matrix(params["transition_matrix"]) a0 = sp.matrix(params["a0"]).reshape(-1, 1) Q0 = sp.matrix(params["Q0"]) Q = sp.matrix(params["Q"]) sigma = sp.eye(5) F = block_diag(F, F, F, F, F) Z = block_diag(Z, Z, Z, Z, Z) a0 = sp.vstack((a0, a0, a0, a0, a0)) Q0 = block_diag(Q0, Q0, Q0, Q0, Q0) Q = block_diag(Q, Q, Q, Q, Q) rsk_filter = RSK(F, Z) fitted_means = rsk_filter.fit(panel_series, a0, Q0, Q, sigma=sigma) # check that all means are equal for row in fitted_means: assert np.allclose(row[0:2], row[2:4]), "Measurements differ unexpectedly."
def test_compare_ox_multi(self): ''' Compare against multivariate ox reference implementation :return: ''' with open(os.path.join(self.datapath, "3/params.json")) as f: params = json.loads(f.read()) yy = sp.array(parse_ox_csv( os.path.join(self.datapath, "3/raw_data.csv")), dtype=np.float64) # unstack yy subarrays = [yy[i * 15:i * 15 + 15, ] for i in range(10)] y = sp.stack(tuple(subarrays), axis=0) rows = [] for i, group in enumerate(y): for entry in group: rows.append([i, "A"] + entry.tolist()) panel_series = PanelSeries.from_list(rows) alpha = sp.matrix(parse_ox_csv( os.path.join(self.datapath, "3/alpha.csv")), dtype=np.float64)[:, 1:] ox_means = sp.array(parse_ox_csv( os.path.join(self.datapath, "3/means.csv")), dtype=np.float64).transpose()[1:] py_means, py_cov = panel_series.means(), panel_series.cov() # check means assert sp.allclose( ox_means, sp.vstack(py_means)), "Python means do not match OX means" # check alphas rsk_filter = RSK(sp.matrix(params["transition_matrix"]), sp.matrix(params["translation_matrix"])) rsk_alpha, alpha_filter, alpha_smooth, V, V_filter, V_smooth, _ = rsk_filter._fit( panel_series, sp.matrix(params["a0"]), sp.matrix(params["Q0"]), sp.matrix(params["Q"]), sigma=sp.matrix(params["sigma"])) a1 = alpha.transpose() a2 = np.squeeze(rsk_alpha)[1:] assert sp.allclose(a1, a2), "Alpha does not match OX alpha"
def median_combine(spec_list, nord, targ_name, ra, dec, plx): """Median combine rest frame spectra. Parameters ---------- spec_list : array_like Array with spectra files. nord : int Number of echelle orders. targ_name : str Target's name. ra : float Target's right ascension in degrees. dec : float Target's declination in degrees. plx : float Target's parallax. """ wavelengths = [] fluxes = [] for o in tqdm(range(nord), desc='Order'): combiner = [] for spec in spec_list: hdul = fits.open(spec) flux = hdul[0].data[1, o, :] wave = hdul[0].data[0, o, :] combiner.append(flux) hdul.close() combiner = sp.array(combiner) combined = sp.median(combiner, axis=0) fluxes.append(combined) wavelengths.append(wave) final_waves = sp.vstack(wavelengths) final_fluxes = sp.vstack(fluxes) final_out = sp.stack([final_waves, final_fluxes]) out = targ_name + '_stacked.fits' hdr = fits.Header() hdr['NAME'] = targ_name hdr['PLX'] = plx hdr['RA (deg)'] = ra hdr['DEC (deg)'] = dec hdu = fits.PrimaryHDU(final_out, header=hdr) hdu.writeto(out) pass
def onclick(event): global X, Y, w1 def xx(): x = sp.sort(X) for i, s in enumerate(list(sp.asarray(X).argsort())): if x[i] == x[i - 1]: return X.remove(X[s]), Y.remove(Y[s]), xx() if event.button == 1 and event.inaxes: X.append(event.xdata) Y.append(event.ydata) w1.set_data(X, Y) fig.canvas.draw() plt.show() else: plt.disconnect(cid) xx() nx = len(X) x = sp.poly1d([1, 0]) L = 0 for i in sp.arange(nx): pom = 1 for j in sp.hstack((sp.arange(i), sp.arange(i + 1, nx))): pom *= (x - X[j]) / (X[i] - X[j]) L += pom * Y[i] print( "\nWielomian lagrange interpolujacy {} punkt ma postac:\n\tx\ty\n {}\n nL:\n {} " .format(nx, sp.stack((X, Y), axis=1), L)) xw = sp.linspace(min(X), max(X), 1000) yw = L(xw) w2.set_data(xw, yw) ax.set_title("lagrange interpolacja {} pktow".format(nx)) ax.set_xlabel('x') ax.set_ylabel('y') ax.axis('equal') ax.legend() fig.canvas.draw()
def sample(self, *shape): bimodal_partial_cdf = cumtrapz(self.bimodal_pdf, initial=0, axis=0) #First sample in the x coordinate samplesx = self.ppfx(sp.random.rand(*shape)) #Next sample in the ybin bin_index = self.ppfix(samplesx) #compute samples inside the ybin def compute_sample(ysample, xsample, binindex): upper_index = sp.int32(sp.ceil(binindex)) lower_index = sp.int32(sp.floor(binindex)) ppy_upper = interpolate.interp1d( bimodal_partial_cdf[:, upper_index], self.y_eval_space) ppy_lower = interpolate.interp1d( bimodal_partial_cdf[:, lower_index], self.y_eval_space) a = bimodal_partial_cdf[:, upper_index] b = bimodal_partial_cdf[:, lower_index] samples_upper = ppy_upper(ysample * (max(a) - min(a)) * 0.9999 + min(a) * 1.001) samples_lower = ppy_lower(ysample * (max(b) - min(b)) * 0.9999 + min(b) * 1.001) #Lerp over the lower and upper a = self.x_eval_space[upper_index] b = self.x_eval_space[lower_index] return samples_lower + (samples_upper - samples_lower) / (a - b) * (xsample - b) #Vectorize and sample in ybin samplesy = sp.random.rand(*shape) compute_samples = sp.vectorize(compute_sample) samplesy = compute_samples(samplesy, samplesx, bin_index) #Stack the values samples = sp.stack([samplesx, samplesy]) return samples.T
def load_image_data(image_file, invert): r""" Loads an image from a *.tiff stack and creates an array from it. The fracture is assumed to be black and the solid is white. """ logger.info('loading image...') img_data = Image.open(image_file) # # creating full image array logger.info('creating image array...') data_array = [] for frame in range(img_data.n_frames): img_data.seek(frame) frame = sp.array(img_data, dtype=bool).transpose() if invert: frame = ~frame # beacuse fracture is black, solid is white data_array.append(frame) # data_array = sp.stack(data_array, axis=2) logger.debug(' image dimensions: {} {} {}'.format(*data_array.shape)) # return data_array
def __new__(cls, image, dtype=bool, *args, **kwargs): r""" Reads image data, fracture pixels are assumed to be truthy (i.e. > 0) and rock pixels false (i.e. == 0) """ # # either reads the image data from a file or image is a scipy array try: image = Image.open(image) logger.debug('loaded image from file or file like object') image_data = [] for frame in range(image.n_frames): image.seek(frame) frame = sp.array(image, dtype=dtype).transpose() image_data.append(frame) # # stacking frames into a single 3 dimensional array image_data = sp.stack(image_data, axis=2) except AttributeError: logger.debug('initialized image from data array') image_data = sp.array(image, ndmin=3, dtype=dtype) # # returning a conversion of regular ndarray into my sybclass return sp.asarray(image_data).view(cls)
vnoise_mu = (VNOISE_MU[1]-VNOISE_MU[0])*sp.random.rand(N_SAMPLES) + VNOISE_MU[0] vnoise_sigma = (VNOISE_SCALE[1]-VNOISE_SCALE[0])*sp.random.rand(N_SAMPLES)+VNOISE_SCALE[0] xnoise_mu1 = (XNOISE_MU1[1]-XNOISE_MU1[0])*sp.random.rand(N_SAMPLES) + XNOISE_MU1[0] left_right = 2*((sp.random.rand(N_SAMPLES)>0.5)-0.5) left_right[-1] = 1 left_right[-2] = -1 left_right[-3] = 1 left_right[-4] = -1 xnoise_mu2 = left_right*((XNOISE_MU2[1]-XNOISE_MU2[0])*sp.random.rand(N_SAMPLES) + XNOISE_MU2[0]) xnoise_scale1 = (XNOISE_SCALE1[1]-XNOISE_SCALE1[0])*sp.random.rand(N_SAMPLES) + XNOISE_SCALE1[0] xnoise_scale2 = (XNOISE_SCALE2[1]-XNOISE_SCALE2[0])*sp.random.rand(N_SAMPLES) + XNOISE_SCALE2[0] batch_generation_inputs = zip(vnoise_mu,vnoise_sigma,xnoise_mu1,xnoise_mu2,xnoise_scale1,xnoise_scale2) y_batch, x_batch = list(zip(*[gen_sample(*generator) for generator in batch_generation_inputs])) batch_y= sp.stack(y_batch) batch_x= sp.stack(x_batch) print(batch_y.shape,batch_x.shape) if False: plt.figure(figsize=(14,16)) for batch_idx in range(N_PLOTS): noisy_x = batch_x[batch_idx,:,0] noisy_vx = batch_x[batch_idx,:,1] true_x = batch_y[batch_idx,:,0] true_vx = batch_y[batch_idx,:,1] plt.subplot(20+(N_PLOTS)*100 + batch_idx*2+1) if batch_idx == 0: plt.title('Location x') plt.plot(t,true_x,lw=2,label='true') plt.plot(t,noisy_x,lw=1,label=r'measured ($\mu =$ [%3.2f, %3.2f], $\sigma =$ [%3.2f, %3.2f])'\
def generate_node_connectivity_array(index_map, data_array): r""" Generates a node connectivity array based on faces, edges and corner adjacency """ # logger.info('generating network connections...') # # setting up some constants x_dim, y_dim, z_dim = data_array.shape conn_map = list(product([0, -1, 1], [0, -1, 1], [0, -1, 1])) conn_map = sp.array(conn_map, dtype=int) conn_map = conn_map[1:] # # creating slice list to process data chunks slice_list = [slice(0, 10000)] for i in range(slice_list[0].stop, index_map.shape[0], slice_list[0].stop): slice_list.append(slice(i, i+slice_list[0].stop)) slice_list[-1] = slice(slice_list[-1].start, index_map.shape[0]) # conns = sp.ones((0, 2), dtype=sp.uint32) logger.debug(' number of slices to process: {}'.format(len(slice_list))) for sect in slice_list: # getting coordinates of nodes and their neighbors nodes = index_map[sect] inds = sp.repeat(nodes, conn_map.shape[0], axis=0) inds += sp.tile(conn_map, (nodes.shape[0], 1)) # # calculating the flattened index of the central nodes and storing nodes = sp.ravel_multi_index(sp.hsplit(nodes, 3), data_array.shape) inds = sp.hstack([inds, sp.repeat(nodes, conn_map.shape[0], axis=0)]) # # removing neigbors with negative indicies mask = ~inds[:, 0:3] < 0 inds = inds[sp.sum(mask, axis=1) == 3] # removing neighbors with indicies outside of bounds mask = (inds[:, 0] < x_dim, inds[:, 1] < y_dim, inds[:, 2] < z_dim) mask = sp.stack(mask, axis=1) inds = inds[sp.sum(mask, axis=1) == 3] # removing indices with zero-weight connection mask = data_array[inds[:, 0], inds[:, 1], inds[:, 2]] inds = inds[mask] if inds.size: # calculating flattened index of remaining nieghbor nodes nodes = sp.ravel_multi_index(sp.hsplit(inds[:, 0:3], 3), data_array.shape) inds = sp.hstack([sp.reshape(inds[:, -1], (-1, 1)), nodes]) # ensuring conns[0] is always < conns[1] for duplicate removal mask = inds[:, 0] > inds[:, 1] inds[mask] = inds[mask][:, ::-1] # appending section connectivity data to conns array conns = sp.append(conns, inds.astype(sp.uint32), axis=0) # # using scipy magic from stackoverflow to remove dupilcate connections logger.info('removing duplicate connections...') dim0 = conns.shape[0] conns = sp.ascontiguousarray(conns) dtype = sp.dtype((sp.void, conns.dtype.itemsize*conns.shape[1])) dim1 = conns.shape[1] conns = sp.unique(conns.view(dtype)).view(conns.dtype).reshape(-1, dim1) logger.debug(' removed {} duplicates'.format(dim0 - conns.shape[0])) # return conns
def generate_node_connectivity_array(index_map, data_array): r""" Generates a node connectivity array based on faces, edges and corner adjacency """ # logger.info('generating network connections...') # # setting up some constants x_dim, y_dim, z_dim = data_array.shape conn_map = list(product([0, -1, 1], [0, -1, 1], [0, -1, 1])) # conn_map = sp.array(conn_map, dtype=int) conn_map = conn_map[1:] # # creating slice list to process data chunks slice_list = [slice(0, 10000)] for i in range(slice_list[0].stop, index_map.shape[0], slice_list[0].stop): slice_list.append(slice(i, i + slice_list[0].stop)) slice_list[-1] = slice(slice_list[-1].start, index_map.shape[0]) # conns = sp.ones((0, 2), dtype=data_array.index_int_type) logger.debug('\tnumber of slices to process: {}'.format(len(slice_list))) percent = 10 for n, sect in enumerate(slice_list): # getting coordinates of nodes and their neighbors nodes = index_map[sect] inds = sp.repeat(nodes, conn_map.shape[0], axis=0) inds += sp.tile(conn_map, (nodes.shape[0], 1)) # # calculating the flattened index of the central nodes and storing nodes = sp.ravel_multi_index(sp.hsplit(nodes, 3), data_array.shape) inds = sp.hstack([inds, sp.repeat(nodes, conn_map.shape[0], axis=0)]) # # removing neigbors with negative indicies mask = ~inds[:, 0:3] < 0 inds = inds[sp.sum(mask, axis=1) == 3] # removing neighbors with indicies outside of bounds mask = (inds[:, 0] < x_dim, inds[:, 1] < y_dim, inds[:, 2] < z_dim) mask = sp.stack(mask, axis=1) inds = inds[sp.sum(mask, axis=1) == 3] # removing indices with zero-weight connection mask = data_array[inds[:, 0], inds[:, 1], inds[:, 2]] inds = inds[mask] if inds.size: # calculating flattened index of remaining nieghbor nodes nodes = sp.ravel_multi_index(sp.hsplit(inds[:, 0:3], 3), data_array.shape) inds = sp.hstack([sp.reshape(inds[:, -1], (-1, 1)), nodes]) # ensuring conns[0] is always < conns[1] for duplicate removal mask = inds[:, 0] > inds[:, 1] inds[mask] = inds[mask][:, ::-1] # appending section connectivity data to conns array conns = sp.append(conns, inds.astype(sp.uint32), axis=0) if int(n / len(slice_list) * 100) == percent: logger.debug('\tprocessed slice {:5d}, {}% complete'.format( n, percent)) percent += 10 # # using scipy magic from stackoverflow to remove dupilcate connections logger.info('removing duplicate connections...') dim0 = conns.shape[0] conns = sp.ascontiguousarray(conns) dtype = sp.dtype((sp.void, conns.dtype.itemsize * conns.shape[1])) dim1 = conns.shape[1] conns = sp.unique(conns.view(dtype)).view(conns.dtype).reshape(-1, dim1) logger.debug('\tremoved {} duplicates'.format(dim0 - conns.shape[0])) # return conns
shapes = [ functools.reduce(lambda x, y: x * y, variable.get_shape()) for variable in tf.trainable_variables() ] print('Nparams: ', functools.reduce(lambda x, y: x + y, shapes)) #%% TRAINING data = [] #Add saver saver = tf.train.Saver() for i in range(8): file_name = 'Oval_circ1_N' + str(i + 1) + '.txt' data.append(read_file(file_name)) file_name = 'Oval_circ2_N' + str(i + 1) + '.txt' data.append(read_file(file_name)) #data contains [t, t_diff, x,y,z,vx,vy,vz, x_t,y_t,z_t,vx_t,vy_t,vz_t] all_data = sp.stack(data) batch_x = all_data[:, :, 2:8] batch_y = all_data[:, :, 8:] train_batch_x = batch_x[:BATCH_SIZE, :, :] train_batch_y = batch_y[:BATCH_SIZE, :, :] test_batch_x = batch_x[BATCH_SIZE:, :, :] test_batch_y = batch_y[BATCH_SIZE:, :, :] #Save losses for plotting of progress dev_loss_plot = [] tra_loss_plot = [] lr_plot = [] with tf.Session(graph=g1) as sess: if RESTORE_CHECKPOINT: saver.restore(sess, SAVE_DIR + "/model.ckpt")
def compute_contacts(dom, people, dmax): """ This function uses a KDTree method to find the contacts \ between individuals. Moreover the contacts with the walls \ are also determined from the wall distance (obtained by the \ fast-marching method). Parameters ---------- dom: Domain contains everything for managing the domain people: numpy array people coordinates and radius : x,y,r dmax: float threshold value used to consider a contact as \ active (dij<dmax) Returns ------- contacts: numpy array all the contacts i,j,dij,eij_x,eij_y such that dij<dmax \ and i<j (no duplication) """ # lf : the number of points at which the algorithm # switches over to brute-force. Has to be positive. lf = 100 if (lf > sys.getrecursionlimit()): sys.setrecursionlimit(lf) kd = cKDTree(people[:, :2], leafsize=lf) ## Find all pairs of points whose distance is at most dmax+2*rmax rmax = people[:, 2].max() neighbors = kd.query_ball_tree(kd, dmax + 2 * rmax) ## Create the contact array : i,j,dij,eij_x,eij_y first_elements = sp.arange(people.shape[0]) ## i.e. i other_elements = list(map(lambda x: x[1:], neighbors)) ## i.e. all the j values for each i lengths = list(map(len, other_elements)) tt = sp.stack([first_elements, lengths], axis=1) I = sp.concatenate(list(map(lambda x: sp.full((x[1], ), x[0]), tt))).astype(int) J = sp.concatenate(other_elements).astype(int) ind = sp.where(I < J)[0] I = I[ind] J = J[ind] DP = people[J, :2] - people[I, :2] Norm = sp.linalg.norm(DP, axis=1, ord=2) Dij = Norm - people[I, 2] - people[J, 2] ind = sp.where(Dij < dmax)[0] Dij = Dij[ind] I = I[ind] J = J[ind] Norm = Norm[ind] DP = DP[ind] contacts = sp.stack([I, J, Dij, DP[:, 0] / Norm, DP[:, 1] / Norm], axis=1) # Add contacts with the walls II = sp.floor((people[:, 1] - dom.ymin - 0.5 * dom.pixel_size) / dom.pixel_size).astype(int) JJ = sp.floor((people[:, 0] - dom.xmin - 0.5 * dom.pixel_size) / dom.pixel_size).astype(int) DD = dom.wall_distance[II, JJ] - people[:, 2] ind = sp.where(DD < dmax)[0] wall_contacts = sp.stack([ ind, -1 * sp.ones(ind.shape), DD[ind], dom.wall_grad_X[II[ind], JJ[ind]], dom.wall_grad_Y[II[ind], JJ[ind]] ], axis=1) contacts = sp.vstack([contacts, wall_contacts]) return sp.array(contacts)
def ovf_to_hdf(self, hdf_name, ovf_files=[], delete_ovfs=False, overwrite=False): """Load the data from multiple .ovf files into a chunked .hdf5 file. This method is useful when the size of the simulation data is too large fit into RAM""" # check if destination file already exists and whether to overwrite if (os.path.isfile(hdf_name)) & (overwrite == False): print( 'file already exists. Set kwarg "overwrite=True" to overwrite existing file. Aborting...' ) return 0 # calculate the size and shape of the data we are dealing with meta, header = self.read_ovf(ovf_files[0], target='meta') header_encoded = [n.encode("ascii", "ignore") for n in header] data_shape = sp.array([ meta['znodes'], meta['ynodes'], meta['xnodes'], meta['valuedim'], len(ovf_files) ]) data_size = sp.prod(data_shape * int(meta['Binary'])) time = [] with hd.File(hdf_name, 'w', libver='latest') as f: dset = f.create_dataset('mag', data_shape, dtype=sp.dtype('f' + str(int(meta['Binary']))), chunks=True) # want to add the meta data in but cannot seem to get it to work # There is a problem with data format supported by hdf5 # go through the ovf files and populate the hdf file with the data chunk_time_length = dset.chunks[-1] data_shape = dset.shape chunk_number = data_shape[-1] / chunk_time_length chunk_shape = dset.chunks # Close the hdf5 file at every opportunity # According to docs: https://support.hdfgroup.org/HDF5/faq/perfissues.html # a memory leak can occur when writing to the same file many times in a loop print("creating dataset") print("shape ", dset.shape) print("chunks ", dset.chunks) # prepare an array of all the data in one time chunk for c in tqdm(range(int(sp.ceil(chunk_number)))): temp_arr = sp.zeros( (data_shape[0], data_shape[1], data_shape[2], data_shape[3], chunk_shape[-1])) # fill the temp array with data from ovf files try: for n in range(chunk_time_length): temp_arr[:, :, :, :, n], meta, raw = self.read_ovf( ovf_files[chunk_time_length * c + n]) time.append(meta['time']) except: # This catches the unexpected case where the chunk length in time # does not perfectly divide the length of ovf list temp = list( map(self.read_ovf, ovf_files[chunk_time_length * c:])) temp_arr = sp.stack([a[0] for a in temp], axis=-1) # open hdf5 file, write the time chunk to disk, close hdf5 file with hd.File(hdf_name, 'r+', libver="latest") as f: f['mag'][:, :, :, :, chunk_time_length * c:chunk_time_length * (c + 1)] = temp_arr # optionally delete the ovf files as they are written to hdf5 if delete_ovfs == True: for n in range(chunk_time_length): os.remove(ovf_files[chunk_time_length * c + n]) # Append to the hdf5 file additional meta data with hd.File(hdf_name, 'a', libver='latest') as f: f.create_dataset('time', data=sp.array(time)) f.create_dataset('header', (len(header_encoded), 1), 'S30', header_encoded) try: f.create_dataset('meta', meta.data) except: print('failed to save metadata to disk') # change permissions data.hdf5 file to be read only os.chmod(hdf_name, 444) return 0
### COMMON # the names of the things present in the log span_names = [name.split('_ON')[0] for name in CodesDf['name'] if name.endswith('_ON')] event_names = [name.split('_EVENT')[0] for name in CodesDf['name'] if name.endswith('_EVENT')] Spans = bhv.log2Spans(Data, span_names) Events = bhv.log2Events(Data, event_names) ### SOME PREPROCESSING # filter unrealistic licks bad_licks = sp.logical_or(Spans['LICK']['dt'] < 20,Spans['LICK']['dt'] > 100) Spans['LICK'] = Spans['LICK'].loc[~bad_licks] # add lick_event Lick_Event = pd.DataFrame(sp.stack([['NA']*Spans['LICK'].shape[0],Spans['LICK']['t_on'].values,['LICK_EVENT']*Spans['LICK'].shape[0]]).T,columns=['code','t','name']) Lick_Event['t'] = Lick_Event['t'].astype('float') Data = Data.append(Lick_Event) Data.sort_values('t') event_names.append("LICK") Events['LICK'] = bhv.log2Event(Data,'LICK') Spans.pop("LICK") span_names.remove("LICK") colors = sns.color_palette('hls',n_colors=len(event_names)+len(span_names))[::-1] cdict = dict(zip(event_names+span_names,colors)) """
def fit(self, steps=200, eps=1e-5, num_restarts=3): """Learn the parameters omega, sigma, eta from training data `train` via EM. Args: steps: integer, the maximum number of EM steps to take before terminating eps: integer, the tolerance threshold for terminating early from the optimization routine. Optimization stops when the change in each parameter is no greater than the value specified here. num_restarts: integers, the number of restarts to perform Returns: a pointer to `self`, where the parameter attributes omega, sigma, and eta have been learned. """ train = self.train N = len(train) T = self.T # Allocate "p" matrices p_aftershock = sp.zeros((N, N)) p_background = sp.zeros((N, N)) # Massage data into proper format and compute reusable values for E-step # (only needs to be done once) # Provides iteration over the data (each pair of rows) without using a # `for` loop i, j = sp.ogrid[0:N, 0:N] t_i = train.iloc[i.reshape(N, )]['t'].values.reshape(N, 1) t_j = train.iloc[j.reshape(N, )]['t'].values.reshape(1, N) x_i = train.iloc[i.reshape(N, )]['x'].values.reshape(N, 1) x_j = train.iloc[j.reshape(N, )]['x'].values.reshape(1, N) y_i = train.iloc[i.reshape(N, )]['y'].values.reshape(N, 1) y_j = train.iloc[j.reshape(N, )]['y'].values.reshape(1, N) distance = (x_j - x_i)**2 + (y_j - y_i)**2 time_check = t_i < t_j origin_check = sp.logical_and((x_i != x_j), (y_i != y_j)) trigger_check = time_check * origin_check t_diff = t_j - t_i best_params = None best_likelihood = None for _ in range(num_restarts): # Initialize parameters to random values # Parameters for triggering kernel omega = sp.absolute(stats.norm.rvs(scale=0.10)) # time decay sigma = sp.absolute(sp.randn()) # spatial decay # Parameter for background rate eta = sp.absolute(sp.randn()) # Loop until convergence for step in tqdm(range(steps)): old_parameters = sp.stack([omega, sigma, eta]) # E-step: Calculate p matrices according to equations (9) and (10) # "[P and P^b] contain the probabilities that event i triggered # homicide j through either the triggering kernel g or the # background rate kernel" p_aftershock = trigger_check * omega * sp.exp(-omega * t_diff) \ * stats.norm.pdf(distance, scale=sigma) p_background = origin_check * stats.norm.pdf(distance, scale=eta) # Normalize as necessary Z = p_aftershock + p_background # also used for likelihood nonzero_Z = Z > 0 p_aftershock[nonzero_Z] /= Z[nonzero_Z] p_background[nonzero_Z] /= T * Z[nonzero_Z] # M-step: Update parameters aftershock_sum = sp.sum(p_aftershock) time_gaps = T - t_i omega_denom = sp.sum(p_aftershock * (t_j - t_i)) \ + sp.sum(time_gaps * sp.exp(-omega * time_gaps)) omega = aftershock_sum / omega_denom sigma = sp.sum( p_aftershock * distance) / (2.0 * aftershock_sum) eta = sp.sum( p_background * distance) / (2.0 * sp.sum(p_background)) diff = sp.absolute(old_parameters - sp.stack([omega, sigma, eta])).max() if diff < eps: print("Convergence met after {} iterations: {}".format( step, sp.sum(Z))) break # Check the likelihood given the discovered parameter values by # summing over Z likelihood = sp.sum(Z) if best_likelihood is None or likelihood > best_likelihood: best_params = omega, sigma, eta best_likelihood = likelihood self.p_aftershock, self.p_background = p_aftershock, p_background self.omega, self.sigma, self.eta = best_params return self
asig = Seg.analogsignals[0] fig, axes = plt.subplots(ncols=nEvents,figsize=[6,2.5]) t_slice = (-2,2) * pq.s for i, event in enumerate(Events[:-1]): asig_slices = [] for t in event.times: try: asig_slice = asig.time_slice(t+t_slice[0],t+t_slice[1]) tvec = asig_slice.times - t # axes[i].plot(tvec, asig_slice, 'k', alpha=0.25,lw=1) asig_slices.append(asig_slice) except ValueError: pass # average: avg = sp.stack([asig_slice.magnitude for asig_slice in asig_slices],axis=1).mean(axis=1) axes[i].plot(tvec,avg,'r') axes[i].plot(Kernels.times,Kernels[:,i],'C%i'%i) axes[i].set_xlabel('time (s)') axes[0].set_ylabel('signal (au)') fig.suptitle('event triggered average') sns.despine(fig) fig.tight_layout(rect=[0, 0.03, 1, 0.95]) # %% """ ## ## ## ## ### ## ## ## ## ## ## ## ##
T = 20 N = 100 M = 200 target_dir = os.path.join('data', 'bouncing_balls_ds0p1') os.makedirs(target_dir, exist_ok=True) nrof_balls = 1 for j in range(M): print('.', end='') sys.stdout.flush() dat = scipy.empty((N), dtype=object) for i in range(N): dat[i] = bounce_vec(res=res, n=nrof_balls, T=T, diffusion_stdev=diffusion_stdev) data = np.reshape(scipy.stack(dat), (N, T, res, res)) utils.save_pickle(os.path.join(target_dir, 'train_%03d.pkl' % j), data) print('\nDone') N = 100 M = 10 dat = scipy.empty((N), dtype=object) for j in range(M): for i in range(N): dat[i] = bounce_vec(res=res, n=nrof_balls, T=T, diffusion_stdev=diffusion_stdev) data = np.reshape(scipy.stack(dat), (N, T, res, res)) utils.save_pickle(os.path.join(target_dir, 'test_%03d.pkl' % j), data)