Esempio n. 1
0
def ev_map(ddir,
           fn_out,
           var,
           mask=None,
           min_dist=14,
           rp=rps_out,
           out_kwargs={}):
    chunks2 = {'latlon': 500, 'time': -1}
    fns = glob.glob(join(ddir, f'{var}*.nc'))
    # combine, stack and mask data
    da = xr.open_mfdataset(fns, combine='by_coords')[var]
    if mask is None:
        mask = xr.ufuncs.isfinite(da.isel(time=0)).load()
    da.coords['mask'] = mask
    da_stacked = da.stack(latlon=('lat', 'lon'))
    da_stacked = da_stacked.where(da_stacked['mask'], drop=True).chunk(chunks2)
    # get AM
    peaks_am_stacked = get_peaks(
        da_stacked, min_dist=min_dist, dim='time',
        chunks=chunks2).groupby('time.year').max('time')
    peaks_am_stacked = peaks_am_stacked.rename({'year': 'time'})
    peaks_am_stacked.name = f'{var}_am'
    # fit gumbel
    with ProgressBar():
        ds_rp_stacked = xlm_fit(peaks_am_stacked,
                                fdist=distr.gum,
                                rp=rps_out,
                                nmin=30)
        ds_rp_stacked = ds_rp_stacked.rename({f'{var}_am': f'{var}_ev'})
        # merge and write
        xr.merge([peaks_am_stacked,
                  ds_rp_stacked]).unstack().reindex_like(mask).to_netcdf(
                      fn_out, **out_kwargs)
Esempio n. 2
0
def work(params):
    worker_id = params[0]
    config = params[1]
    results = []
    global PEAKS
    PEAKS = peaks.get_peaks(config["PEAKS"],config["CHROMOSOME_LENGTH"],64)
    for sample_num in range(config["MAX_SAMPLES"]):
        server = jsonrpclib.Server(config["SERVER"]) #Create every time to prevent timeouts
        if int(server.found(None)):
            break
        else:
            gen_data = evolve(sample_num, config)
            if gen_data[0]:
                server.found_it(None)
            results.append([worker_id] + gen_data[1])
    return results
Esempio n. 3
0
def work(params):
    worker_id = params[0]
    config = params[1]
    results = []
    global PEAKS
    PEAKS = peaks.get_peaks(config["PEAKS"], config["CHROMOSOME_LENGTH"], 64)
    for sample_num in range(config["MAX_SAMPLES"]):
        server = jsonrpclib.Server(
            config["SERVER"])  #Create every time to prevent timeouts
        if int(server.found(None)):
            break
        else:
            gen_data = evolve(sample_num, config)
            if gen_data[0]:
                server.found_it(None)
            results.append([worker_id] + gen_data[1])
    return results
def flood_day(ts, min_dist=1, dim='time'):
    """Returns a yearly time series with the flood day and magnitude.
    i.e. the day of the year (DOY) of the annual maxima (AM) in ts.
    
    Parameters
    ----------
    ts : xarray DataArray
        time series with daily time step
    dim : str, optional
        name of time dimension in ts (the default is 'time')
    
    Returns
    -------
    xarray DataSet
        AM 
        DOY of annual maxima per year [julian calander day]
        
    """
    # make sure first (last) year start (end) at start (end) of the year and daily time step
    # t = ts[dim].to_index()
    # tstart =  t[0] if (t[0].month == 1) and (t[0].day == 1) else date(t[0].year+1, 1, 1)
    # tend =  t[-1] if (t[-1].month == 12) and (t[-1].day == 31) else date(t[-1].year-1, 12, 31)
    # ts = ts.sel(**{dim: pd.date_range(tstart, tend, )}, method='nearest')
    grp = '{}.year'.format(dim)
    # get peaks only with optional min dist argument
    peaks = get_peaks(ts, min_dist=min_dist, dim=dim)
    # get annual maxima
    am = peaks.groupby(grp).max(dim=dim).chunk({'year': -1})
    am.name = 'annual_max'
    am.attrs.update(description='annual maximum',
                    unit=ts.attrs.get('unit', 'unknown'))
    # now argmax for doy annual maxima
    # NOTE: argmax on dask chuncks returns zeros with xr version 0.10.8 and dask version 0.18.2
    # doy = ts.groupby(grp).reduce(np.argmax, dim=dim)
    # fillna to avoid "all-NaNs error"
    doy = peaks.fillna(-np.inf).groupby(grp).argmax(dim=dim)
    # deal with year all-NaNs years and start counting from 1 (jan 1st) instead of python zero indexing
    doy = xr.where(xr.ufuncs.isfinite(am), doy + 1, np.nan).chunk({'year': -1})
    doy.name = 'doy'
    doy.attrs.update(description='annual maximum day of the year',
                     unit='julian calendar day')
    return xr.merge([am, doy])
Esempio n. 5
0
y, n = 0,0
with open(sys.argv[3]) as fp:
    for line in fp:
        c += 1
        chrom, start, end, offsets, rs, strand = line.strip().split('\t')[:6]
        expression = [int(float(i)) for i in line.strip().split('\t')[6].split(",")]
        mcmc = [float(i) for i in line.strip().split('\t')[7].split(",")]

        seq = genome_seq[chrom][int(start):int(end)]
        if strand == "-":
            expression.reverse()
            seq = revcomp(seq)

        print chrom, start, end, strand, rs

        peaks = get_peaks(mcmc, max(expression), 0.08)
        for peak in peaks:
            peak.plot(len(expression))

        x = range(0, len(expression), WINDOWS)
        if len(mcmc) != len(x): x = range(0, len(expression)+1, WINDOWS)
        plt.plot(x, mcmc, linewidth=6, color= 'k')
        plt.plot(expression)
        for i in range(30, len(seq) - 30):
            motif = seq[i - len(tp_pwm): i + len(fp_pwm)]
            score = (score_motif(pwm, motif) - min_score) / (max_score - min_score)
            if score > .8:
                plt.scatter([i], [max(expression)], marker = 'o', c = color(score))

        plt.autoscale(tight = True)
        plt.show(block = False)
vars_ = ['h', 'Hskewsurge', 'Q']
rm = {'Hskewsurge_day': 'Hskewsurge'}

# read data
ds = xr.open_zarr(fn_in).sel(scen=scenarios).rename(rm)
ds['h'] = ds['WSE'] - ds['z0']
ds = ds[vars_].sel(scen='surge').drop('scen')

# window max driver
ds_wdwmax = ds.rolling(time=wdw * 2 + 1, min_periods=1,
                       center=True).construct('window').max('window').astype(
                           np.float32)

# get AM h peaks
peaks = get_peaks(
    ds['h'], min_dist=30,
    dim='time').reset_coords(drop=True).reindex_like(ds).compute()
hpeaks = peaks.where(np.isfinite(peaks), -np.inf)
hpeaks_yr = hpeaks.groupby('time.year')
hpeaks_am = hpeaks_yr == hpeaks_yr.max('time')
hpeaks_doy = hpeaks['time'].dt.dayofyear

# combine h AM peaks, drivers and return periods and keep only an. max.
ds_am_h = xr.merge([
    hpeaks_doy,
    ds_wdwmax,
]).where(hpeaks_am, -np.inf).groupby('time.year').max('time')
ds_am_h = ds_am_h.transpose('year', 'ensemble', 'index').compute()

# write to file
ds_am_h.attrs.update(
Esempio n. 7
0
chunks = {'ensemble': -1, 'scen': -1, 'time': -1, 'index': 100}
#I/O
root = r'/scratch/compound_hotspots'
ddir = join(root, 'data', '4-postprocessed')
# IN
fn_in = join(ddir, 'rivmth_reanalysis.zarr')
fn_out = join(ddir, 'rivmth_ev.nc')

# read
# ----
da_wse = xr.open_zarr(fn_in)['WSE'].sel(scen=scenarios)

# AM peaks & extreme value analysis (fit gumbel)
print(basename(fn_out))
peaks_am = get_peaks(
    da_wse, min_dist=min_dist,
    dim='time').fillna(-np.inf).groupby('time.year').max('time')
peaks_am = peaks_am.rename({'year': 'time'}).chunk(chunks)
ds_rp = xlm_fit(peaks_am, fdist=distr.gum, rp=rps_out)
ds_out = xr.merge([
    peaks_am.to_dataset().rename({'WSE': 'WSE_am'}),
    ds_rp.rename({'WSE': 'WSE_ev'}),
])
ds_out.chunk(chunks).to_netcdf(fn_out)

# confidence intervals using bootstrap
ds = xr.open_dataset(fn_out, chunks=chunks)
fn_out = fn_out.replace('.nc', f'_ci_p{alpha/2*100:02.0f}.nc')
print(basename(fn_out))
with ProgressBar():
    ds_rp_ci = xlm_fit_ci(ds['WSE_am'].chunk(chunks),
Esempio n. 8
0
import peaks

from deap import base
from deap import creator
from deap import tools

experiment = "%d-p%d" % (512, 512)
experiment_id = experiment + "-%d" % round(time.time(), 0)
datafile = open(experiment_id + ".dat", "a")


def evalPeaks(individual):
    return peaks.p_peaks(individual, pks),


pks = peaks.get_peaks(number=256, bits=512, seed=64)

creator.create("FitnessMax", base.Fitness, weights=(1.0, ))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
# Attribute generator
toolbox.register("attr_bool", random.randint, 0, 1)
# Structure initializers
toolbox.register("individual", tools.initRepeat, creator.Individual,
                 toolbox.attr_bool, 512)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Operator registering
toolbox.register("evaluate", evalPeaks)
toolbox.register("mate", tools.cxTwoPoints)
Esempio n. 9
0
    for line in fp:
        c += 1
        chrom, start, end, offsets, rs, strand = line.strip().split('\t')[:6]
        expression = [
            int(float(i)) for i in line.strip().split('\t')[6].split(",")
        ]
        mcmc = [float(i) for i in line.strip().split('\t')[7].split(",")]

        seq = genome_seq[chrom][int(start):int(end)]
        if strand == "-":
            expression.reverse()
            seq = revcomp(seq)

        print chrom, start, end, strand, rs

        peaks = get_peaks(mcmc, max(expression), 0.08)
        for peak in peaks:
            peak.plot(len(expression))

        x = range(0, len(expression), WINDOWS)
        if len(mcmc) != len(x): x = range(0, len(expression) + 1, WINDOWS)
        plt.plot(x, mcmc, linewidth=6, color='k')
        plt.plot(expression)
        for i in range(30, len(seq) - 30):
            motif = seq[i - len(tp_pwm):i + len(fp_pwm)]
            score = (score_motif(pwm, motif) - min_score) / (max_score -
                                                             min_score)
            if score > .8:
                plt.scatter([i], [max(expression)], marker='o', c=color(score))

        plt.autoscale(tight=True)
Esempio n. 10
0
def cale_workflow(input_file, mask=None, output_dir=None, prefix=None, roi_data_dir=None, ns_data_dir=None, macm_data_dir=None, rsfc_data_dir=None, con_data_dir=None, work_dir=None):

    if prefix == None:
        prefix = op.basename(input_file).split('.')[0] + "_"

    if output_dir == None:
        output_dir = op.join(op.abspath(input_file), 'prefix')
    if op.isdir(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir)

    if ns_data_dir == None:
        ns_data_dir = '.'

    if con_data_dir == None:
        con_data_dir = '.'

    if roi_data_dir == None:
        roi_data_dir = '.'
    if not op.isdir(roi_data_dir):
        os.makedirs(roi_data_dir)

    if macm_data_dir == None:
        macm_data_dir = '.'
    if not op.isdir(macm_data_dir):
        os.makedirs(macm_data_dir)

    if rsfc_data_dir == None:
        rsfc_data_dir = '.'
    if not op.isdir(rsfc_data_dir):
        os.makedirs(rsfc_data_dir)

    if work_dir == None:
        work_dir = op.join('/scratch', prefix)
    if op.isdir(work_dir):
        shutil.rmtree(work_dir)
    os.makedirs(work_dir)


    """
    Import input file.
    If a text file or spreadsheet, run ALE workflow.
    Otherwise, start cALE workflow.
    """

    file_ext = '.'.join(op.basename(input_file).split('.')[1:])
    sheet_ext = ['txt', 'csv', 'tsv']
    img_ext = ['nii', 'nii.gz']

    og_ale_dir = op.join(output_dir, 'original', 'ale')
    os.makedirs(og_ale_dir)

    if file_ext in sheet_ext:
        """
        Run ALE workflow first.
        """
        ale_sleuth_workflow(input_file, sleuth_file2=None, output_dir=og_ale_dir,
                                prefix=prefix, n_iters=10000, v_thr=0.001,
                                fwhm=None, n_cores=-1)
        img_file = op.join(og_ale_dir, prefix + "_logp_level-cluster_corr-FWE_method-permutation.nii.gz")
    elif file_ext in img_ext:
        shutil.copy(input_file, og_ale_dir)
        img_file = op.join(og_ale_dir, op.basename(input_file))
    else:
        print('Spreadsheets must be of type .txt, .csv, or .tsv. '
              'Image files must be of type .nii or .nii.gz.')


    """
    Identify cluster peaks.
    """
    peaks_df = get_peaks(img_file, og_ale_dir)

    og_roi_dir = op.join(output_dir, 'original', 'rois')
    os.makedirs(og_roi_dir)

    #run connectivity workflow for each set of coordinates in the dataframe
    for i, row in peaks_df.iterrows():

      roi_prefix = '{x}_{y}_{z}'.format(x=row['x'], y=row['y'], z=row['z'])

      # See if file already exists in ROI directory
      roi_fn = op.join(roi_data_dir, roi_prefix + '.nii.gz')
      if not op.isfile(roi_fn):
          make_sphere(row['x'], row['y'], row['z'], roi_data_dir)

      shutil.copy(roi_fn, og_roi_dir)

      connectivity_workflow(roi_fn, op.join(output_dir, 'original'), roi_data_dir, macm_data_dir, rs_data_dir, con_data_dir)

    com_ale_dir = op.join(output_dir, 'complementary', 'ale')
    os.makedirs(com_ale_dir)

    #sum consensus connectivity maps
    cale(og_con_dir, com_ale_dir)

    cale_fn = glob(op.join(com_ale_dir, 'cALE_thresh-*.nii.gz'))
    #identify cluster peaks in cALE image
    com_peaks_df = get_peaks(cale_fn, work_dir)

    com_roi_dir = op.join(output_dir, 'complementary', 'rois')
    os.makedirs(com_roi_dir)

    #run connectivity workflow for each set of coordinates in the dataframe
    for i, row in com_peaks_df.iterrows():

        roi_prefix = '{x}_{y}_{z}'.format(x=row['x'], y=row['y'], z=row['z'])

        # See if file already exists in ROI directory
        roi_fn = op.join(roi_data_dir, roi_prefix + '.nii.gz')
        if not op.isfile(roi_fn):
            make_sphere(row['x'], row['y'], row['z'], roi_data_dir)

        shutil.copy(roi_fn, com_roi_dir)

        connectivity_workflow(roi_fn, op.join(output_dir, 'complementary'), roi_data_dir, macm_data_dir, rs_data_dir, con_data_dir)

    #do clustering of complementary macms and rsfcs
    com_macm_dir = op.join(output_dir, 'complementary', 'macm')
    com_rsfc_dir = op.join(output_dir, 'complementary', 'rsfc')

    for nclust in range(2,9,1):

        for tmp_conn in ['macm', 'rsfc'']:
            tmp_dir = op.join(output_dir, 'complementary', tmp_conn)
            clustering_workflow(tmp_dir, nclust, 'hierarchical')
Esempio n. 11
0
import time
import peaks

from deap import base
from deap import creator
from deap import tools

experiment = "%d-p%d" % (512,512)
experiment_id = experiment + "-%d" % round(time.time(),0)
datafile = open(experiment_id+".dat","a")


def evalPeaks(individual):
    return peaks.p_peaks(individual, pks),

pks = peaks.get_peaks(number=512,bits=256,seed=64)




creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()
# Attribute generator
toolbox.register("attr_bool", random.randint, 0, 1)
# Structure initializers
toolbox.register("individual", tools.initRepeat, creator.Individual,
                 toolbox.attr_bool, 256)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)