def ev_map(ddir, fn_out, var, mask=None, min_dist=14, rp=rps_out, out_kwargs={}): chunks2 = {'latlon': 500, 'time': -1} fns = glob.glob(join(ddir, f'{var}*.nc')) # combine, stack and mask data da = xr.open_mfdataset(fns, combine='by_coords')[var] if mask is None: mask = xr.ufuncs.isfinite(da.isel(time=0)).load() da.coords['mask'] = mask da_stacked = da.stack(latlon=('lat', 'lon')) da_stacked = da_stacked.where(da_stacked['mask'], drop=True).chunk(chunks2) # get AM peaks_am_stacked = get_peaks( da_stacked, min_dist=min_dist, dim='time', chunks=chunks2).groupby('time.year').max('time') peaks_am_stacked = peaks_am_stacked.rename({'year': 'time'}) peaks_am_stacked.name = f'{var}_am' # fit gumbel with ProgressBar(): ds_rp_stacked = xlm_fit(peaks_am_stacked, fdist=distr.gum, rp=rps_out, nmin=30) ds_rp_stacked = ds_rp_stacked.rename({f'{var}_am': f'{var}_ev'}) # merge and write xr.merge([peaks_am_stacked, ds_rp_stacked]).unstack().reindex_like(mask).to_netcdf( fn_out, **out_kwargs)
def work(params): worker_id = params[0] config = params[1] results = [] global PEAKS PEAKS = peaks.get_peaks(config["PEAKS"],config["CHROMOSOME_LENGTH"],64) for sample_num in range(config["MAX_SAMPLES"]): server = jsonrpclib.Server(config["SERVER"]) #Create every time to prevent timeouts if int(server.found(None)): break else: gen_data = evolve(sample_num, config) if gen_data[0]: server.found_it(None) results.append([worker_id] + gen_data[1]) return results
def work(params): worker_id = params[0] config = params[1] results = [] global PEAKS PEAKS = peaks.get_peaks(config["PEAKS"], config["CHROMOSOME_LENGTH"], 64) for sample_num in range(config["MAX_SAMPLES"]): server = jsonrpclib.Server( config["SERVER"]) #Create every time to prevent timeouts if int(server.found(None)): break else: gen_data = evolve(sample_num, config) if gen_data[0]: server.found_it(None) results.append([worker_id] + gen_data[1]) return results
def flood_day(ts, min_dist=1, dim='time'): """Returns a yearly time series with the flood day and magnitude. i.e. the day of the year (DOY) of the annual maxima (AM) in ts. Parameters ---------- ts : xarray DataArray time series with daily time step dim : str, optional name of time dimension in ts (the default is 'time') Returns ------- xarray DataSet AM DOY of annual maxima per year [julian calander day] """ # make sure first (last) year start (end) at start (end) of the year and daily time step # t = ts[dim].to_index() # tstart = t[0] if (t[0].month == 1) and (t[0].day == 1) else date(t[0].year+1, 1, 1) # tend = t[-1] if (t[-1].month == 12) and (t[-1].day == 31) else date(t[-1].year-1, 12, 31) # ts = ts.sel(**{dim: pd.date_range(tstart, tend, )}, method='nearest') grp = '{}.year'.format(dim) # get peaks only with optional min dist argument peaks = get_peaks(ts, min_dist=min_dist, dim=dim) # get annual maxima am = peaks.groupby(grp).max(dim=dim).chunk({'year': -1}) am.name = 'annual_max' am.attrs.update(description='annual maximum', unit=ts.attrs.get('unit', 'unknown')) # now argmax for doy annual maxima # NOTE: argmax on dask chuncks returns zeros with xr version 0.10.8 and dask version 0.18.2 # doy = ts.groupby(grp).reduce(np.argmax, dim=dim) # fillna to avoid "all-NaNs error" doy = peaks.fillna(-np.inf).groupby(grp).argmax(dim=dim) # deal with year all-NaNs years and start counting from 1 (jan 1st) instead of python zero indexing doy = xr.where(xr.ufuncs.isfinite(am), doy + 1, np.nan).chunk({'year': -1}) doy.name = 'doy' doy.attrs.update(description='annual maximum day of the year', unit='julian calendar day') return xr.merge([am, doy])
y, n = 0,0 with open(sys.argv[3]) as fp: for line in fp: c += 1 chrom, start, end, offsets, rs, strand = line.strip().split('\t')[:6] expression = [int(float(i)) for i in line.strip().split('\t')[6].split(",")] mcmc = [float(i) for i in line.strip().split('\t')[7].split(",")] seq = genome_seq[chrom][int(start):int(end)] if strand == "-": expression.reverse() seq = revcomp(seq) print chrom, start, end, strand, rs peaks = get_peaks(mcmc, max(expression), 0.08) for peak in peaks: peak.plot(len(expression)) x = range(0, len(expression), WINDOWS) if len(mcmc) != len(x): x = range(0, len(expression)+1, WINDOWS) plt.plot(x, mcmc, linewidth=6, color= 'k') plt.plot(expression) for i in range(30, len(seq) - 30): motif = seq[i - len(tp_pwm): i + len(fp_pwm)] score = (score_motif(pwm, motif) - min_score) / (max_score - min_score) if score > .8: plt.scatter([i], [max(expression)], marker = 'o', c = color(score)) plt.autoscale(tight = True) plt.show(block = False)
vars_ = ['h', 'Hskewsurge', 'Q'] rm = {'Hskewsurge_day': 'Hskewsurge'} # read data ds = xr.open_zarr(fn_in).sel(scen=scenarios).rename(rm) ds['h'] = ds['WSE'] - ds['z0'] ds = ds[vars_].sel(scen='surge').drop('scen') # window max driver ds_wdwmax = ds.rolling(time=wdw * 2 + 1, min_periods=1, center=True).construct('window').max('window').astype( np.float32) # get AM h peaks peaks = get_peaks( ds['h'], min_dist=30, dim='time').reset_coords(drop=True).reindex_like(ds).compute() hpeaks = peaks.where(np.isfinite(peaks), -np.inf) hpeaks_yr = hpeaks.groupby('time.year') hpeaks_am = hpeaks_yr == hpeaks_yr.max('time') hpeaks_doy = hpeaks['time'].dt.dayofyear # combine h AM peaks, drivers and return periods and keep only an. max. ds_am_h = xr.merge([ hpeaks_doy, ds_wdwmax, ]).where(hpeaks_am, -np.inf).groupby('time.year').max('time') ds_am_h = ds_am_h.transpose('year', 'ensemble', 'index').compute() # write to file ds_am_h.attrs.update(
chunks = {'ensemble': -1, 'scen': -1, 'time': -1, 'index': 100} #I/O root = r'/scratch/compound_hotspots' ddir = join(root, 'data', '4-postprocessed') # IN fn_in = join(ddir, 'rivmth_reanalysis.zarr') fn_out = join(ddir, 'rivmth_ev.nc') # read # ---- da_wse = xr.open_zarr(fn_in)['WSE'].sel(scen=scenarios) # AM peaks & extreme value analysis (fit gumbel) print(basename(fn_out)) peaks_am = get_peaks( da_wse, min_dist=min_dist, dim='time').fillna(-np.inf).groupby('time.year').max('time') peaks_am = peaks_am.rename({'year': 'time'}).chunk(chunks) ds_rp = xlm_fit(peaks_am, fdist=distr.gum, rp=rps_out) ds_out = xr.merge([ peaks_am.to_dataset().rename({'WSE': 'WSE_am'}), ds_rp.rename({'WSE': 'WSE_ev'}), ]) ds_out.chunk(chunks).to_netcdf(fn_out) # confidence intervals using bootstrap ds = xr.open_dataset(fn_out, chunks=chunks) fn_out = fn_out.replace('.nc', f'_ci_p{alpha/2*100:02.0f}.nc') print(basename(fn_out)) with ProgressBar(): ds_rp_ci = xlm_fit_ci(ds['WSE_am'].chunk(chunks),
import peaks from deap import base from deap import creator from deap import tools experiment = "%d-p%d" % (512, 512) experiment_id = experiment + "-%d" % round(time.time(), 0) datafile = open(experiment_id + ".dat", "a") def evalPeaks(individual): return peaks.p_peaks(individual, pks), pks = peaks.get_peaks(number=256, bits=512, seed=64) creator.create("FitnessMax", base.Fitness, weights=(1.0, )) creator.create("Individual", list, fitness=creator.FitnessMax) toolbox = base.Toolbox() # Attribute generator toolbox.register("attr_bool", random.randint, 0, 1) # Structure initializers toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, 512) toolbox.register("population", tools.initRepeat, list, toolbox.individual) # Operator registering toolbox.register("evaluate", evalPeaks) toolbox.register("mate", tools.cxTwoPoints)
for line in fp: c += 1 chrom, start, end, offsets, rs, strand = line.strip().split('\t')[:6] expression = [ int(float(i)) for i in line.strip().split('\t')[6].split(",") ] mcmc = [float(i) for i in line.strip().split('\t')[7].split(",")] seq = genome_seq[chrom][int(start):int(end)] if strand == "-": expression.reverse() seq = revcomp(seq) print chrom, start, end, strand, rs peaks = get_peaks(mcmc, max(expression), 0.08) for peak in peaks: peak.plot(len(expression)) x = range(0, len(expression), WINDOWS) if len(mcmc) != len(x): x = range(0, len(expression) + 1, WINDOWS) plt.plot(x, mcmc, linewidth=6, color='k') plt.plot(expression) for i in range(30, len(seq) - 30): motif = seq[i - len(tp_pwm):i + len(fp_pwm)] score = (score_motif(pwm, motif) - min_score) / (max_score - min_score) if score > .8: plt.scatter([i], [max(expression)], marker='o', c=color(score)) plt.autoscale(tight=True)
def cale_workflow(input_file, mask=None, output_dir=None, prefix=None, roi_data_dir=None, ns_data_dir=None, macm_data_dir=None, rsfc_data_dir=None, con_data_dir=None, work_dir=None): if prefix == None: prefix = op.basename(input_file).split('.')[0] + "_" if output_dir == None: output_dir = op.join(op.abspath(input_file), 'prefix') if op.isdir(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir) if ns_data_dir == None: ns_data_dir = '.' if con_data_dir == None: con_data_dir = '.' if roi_data_dir == None: roi_data_dir = '.' if not op.isdir(roi_data_dir): os.makedirs(roi_data_dir) if macm_data_dir == None: macm_data_dir = '.' if not op.isdir(macm_data_dir): os.makedirs(macm_data_dir) if rsfc_data_dir == None: rsfc_data_dir = '.' if not op.isdir(rsfc_data_dir): os.makedirs(rsfc_data_dir) if work_dir == None: work_dir = op.join('/scratch', prefix) if op.isdir(work_dir): shutil.rmtree(work_dir) os.makedirs(work_dir) """ Import input file. If a text file or spreadsheet, run ALE workflow. Otherwise, start cALE workflow. """ file_ext = '.'.join(op.basename(input_file).split('.')[1:]) sheet_ext = ['txt', 'csv', 'tsv'] img_ext = ['nii', 'nii.gz'] og_ale_dir = op.join(output_dir, 'original', 'ale') os.makedirs(og_ale_dir) if file_ext in sheet_ext: """ Run ALE workflow first. """ ale_sleuth_workflow(input_file, sleuth_file2=None, output_dir=og_ale_dir, prefix=prefix, n_iters=10000, v_thr=0.001, fwhm=None, n_cores=-1) img_file = op.join(og_ale_dir, prefix + "_logp_level-cluster_corr-FWE_method-permutation.nii.gz") elif file_ext in img_ext: shutil.copy(input_file, og_ale_dir) img_file = op.join(og_ale_dir, op.basename(input_file)) else: print('Spreadsheets must be of type .txt, .csv, or .tsv. ' 'Image files must be of type .nii or .nii.gz.') """ Identify cluster peaks. """ peaks_df = get_peaks(img_file, og_ale_dir) og_roi_dir = op.join(output_dir, 'original', 'rois') os.makedirs(og_roi_dir) #run connectivity workflow for each set of coordinates in the dataframe for i, row in peaks_df.iterrows(): roi_prefix = '{x}_{y}_{z}'.format(x=row['x'], y=row['y'], z=row['z']) # See if file already exists in ROI directory roi_fn = op.join(roi_data_dir, roi_prefix + '.nii.gz') if not op.isfile(roi_fn): make_sphere(row['x'], row['y'], row['z'], roi_data_dir) shutil.copy(roi_fn, og_roi_dir) connectivity_workflow(roi_fn, op.join(output_dir, 'original'), roi_data_dir, macm_data_dir, rs_data_dir, con_data_dir) com_ale_dir = op.join(output_dir, 'complementary', 'ale') os.makedirs(com_ale_dir) #sum consensus connectivity maps cale(og_con_dir, com_ale_dir) cale_fn = glob(op.join(com_ale_dir, 'cALE_thresh-*.nii.gz')) #identify cluster peaks in cALE image com_peaks_df = get_peaks(cale_fn, work_dir) com_roi_dir = op.join(output_dir, 'complementary', 'rois') os.makedirs(com_roi_dir) #run connectivity workflow for each set of coordinates in the dataframe for i, row in com_peaks_df.iterrows(): roi_prefix = '{x}_{y}_{z}'.format(x=row['x'], y=row['y'], z=row['z']) # See if file already exists in ROI directory roi_fn = op.join(roi_data_dir, roi_prefix + '.nii.gz') if not op.isfile(roi_fn): make_sphere(row['x'], row['y'], row['z'], roi_data_dir) shutil.copy(roi_fn, com_roi_dir) connectivity_workflow(roi_fn, op.join(output_dir, 'complementary'), roi_data_dir, macm_data_dir, rs_data_dir, con_data_dir) #do clustering of complementary macms and rsfcs com_macm_dir = op.join(output_dir, 'complementary', 'macm') com_rsfc_dir = op.join(output_dir, 'complementary', 'rsfc') for nclust in range(2,9,1): for tmp_conn in ['macm', 'rsfc'']: tmp_dir = op.join(output_dir, 'complementary', tmp_conn) clustering_workflow(tmp_dir, nclust, 'hierarchical')
import time import peaks from deap import base from deap import creator from deap import tools experiment = "%d-p%d" % (512,512) experiment_id = experiment + "-%d" % round(time.time(),0) datafile = open(experiment_id+".dat","a") def evalPeaks(individual): return peaks.p_peaks(individual, pks), pks = peaks.get_peaks(number=512,bits=256,seed=64) creator.create("FitnessMax", base.Fitness, weights=(1.0,)) creator.create("Individual", list, fitness=creator.FitnessMax) toolbox = base.Toolbox() # Attribute generator toolbox.register("attr_bool", random.randint, 0, 1) # Structure initializers toolbox.register("individual", tools.initRepeat, creator.Individual, toolbox.attr_bool, 256) toolbox.register("population", tools.initRepeat, list, toolbox.individual)