예제 #1
0
파일: chunk.py 프로젝트: martindurant/dask
def argtopk(a_plus_idx, k, axis, keepdims):
    """ Chunk and combine function of argtopk

    Extract the indices of the k largest elements from a on the given axis.
    If k is negative, extract the indices of the -k smallest elements instead.
    Note that, unlike in the parent function, the returned elements
    are not sorted internally.
    """
    assert keepdims is True
    axis = axis[0]

    if isinstance(a_plus_idx, list):
        a_plus_idx = list(flatten(a_plus_idx))
        a = np.concatenate([ai for ai, _ in a_plus_idx], axis)
        idx = np.concatenate([broadcast_to(idxi, ai.shape)
                              for ai, idxi in a_plus_idx], axis)
    else:
        a, idx = a_plus_idx

    if abs(k) >= a.shape[axis]:
        return a_plus_idx

    idx2 = np.argpartition(a, -k, axis=axis)
    k_slice = slice(-k, None) if k > 0 else slice(-k)
    idx2 = idx2[tuple(k_slice if i == axis else slice(None)
                      for i in range(a.ndim))]
    return take_along_axis(a, idx2, axis), take_along_axis(idx, idx2, axis)
예제 #2
0
    def agregar(símismo, nuevos, edad=0, etapas=None):

        # Limpiar edades de cohortes
        símismo._edades[símismo._pobs == 0] = 0

        if etapas is None:
            rbn = slice(None)
            nuevos = símismo._proc_matr_datos(nuevos)
        else:
            rbn = símismo.rebanar(etapas)

        # Las edades y las poblaciones actuales de las etapas
        pobs = símismo._pobs[rbn]
        edades = símismo._edades[rbn]

        eje_coh = símismo.eje_coh()

        # Los índices de los días cuyos cohortes tienen la edad mínima. Si hay más que un día (cohorte) con la
        # edad mínima, tomará el primero.
        í_cohs = np.expand_dims(np.argmin(edades, axis=eje_coh), axis=eje_coh)

        # Las edades de los cohortes con las edades mínimas.
        eds_mín = np.take_along_axis(edades, í_cohs, axis=eje_coh)

        # Las poblaciones que corresponden a estas edades mínimas.
        pobs_coresp = np.take_along_axis(pobs, í_cohs, axis=eje_coh)

        # Dónde no hay población existente, reinicializamos la edad.
        eds_mín = np.where(pobs_coresp == 0, [0], eds_mín)

        # Calcular el peso de las edades existentes, según sus poblaciones existentes (para combinar con el nuevo
        # cohorte si hay que combinarlo con un cohorte existente).
        peso_ed_ya = np.divide(pobs_coresp, np.add(nuevos, pobs_coresp))
        peso_ed_ya[np.isnan(peso_ed_ya)] = 0

        # Los edades promedios. Si no había necesidad de combinar cohortes, será la población del nuevo cohorte.
        eds_prom = np.add(np.multiply(eds_mín, peso_ed_ya), np.multiply(edad, np.subtract(1, peso_ed_ya)))

        # Guardar las edades actualizadas en los índices apropiados
        np.put_along_axis(edades, í_cohs, eds_prom, axis=eje_coh)

        # Guardar las poblaciones actualizadas en los índices apropiados
        np.put_along_axis(pobs, í_cohs, nuevos + pobs_coresp, axis=eje_coh)

        símismo._pobs[rbn] = pobs
        símismo._edades[rbn] = edades
예제 #3
0
파일: chunk.py 프로젝트: martindurant/dask
def argtopk_aggregate(a_plus_idx, k, axis, keepdims):
    """ Final aggregation function of argtopk

    Invoke argtopk one final time, sort the results internally, drop the data
    and return the index only.
    """
    assert keepdims is True
    a, idx = argtopk(a_plus_idx, k, axis, keepdims)
    axis = axis[0]

    idx2 = np.argsort(a, axis=axis)
    idx = take_along_axis(idx, idx2, axis)
    if k < 0:
        return idx
    return idx[tuple(slice(None, None, -1) if i == axis else slice(None)
                     for i in range(idx.ndim))]
예제 #4
0
 def random_sample(
     self,
     inputs,
     n,
     topk=None,
     topp=None,
     states=None,
     temperature=1,
     min_ends=1
 ):
     """随机采样n个结果
     说明:非None的topk表示每一步只从概率最高的topk个中采样;而非None的topp
          表示每一步只从概率最高的且概率之和刚好达到topp的若干个token中采样。
     返回:n个解码序列组成的list。
     """
     inputs = [np.array([i]) for i in inputs]
     output_ids = self.first_output_ids
     results = []
     for step in range(self.maxlen):
         probas, states = self.predict(
             inputs, output_ids, states, temperature, 'probas'
         )  # 计算当前概率
         probas /= probas.sum(axis=1, keepdims=True)  # 确保归一化
         if step == 0:  # 第1步预测后将结果重复n次
             probas = np.repeat(probas, n, axis=0)
             inputs = [np.repeat(i, n, axis=0) for i in inputs]
             output_ids = np.repeat(output_ids, n, axis=0)
         if topk is not None:
             k_indices = probas.argpartition(-topk,
                                             axis=1)[:, -topk:]  # 仅保留topk
             probas = np.take_along_axis(probas, k_indices, axis=1)  # topk概率
             probas /= probas.sum(axis=1, keepdims=True)  # 重新归一化
         if topp is not None:
             p_indices = probas.argsort(axis=1)[:, ::-1]  # 从高到低排序
             probas = np.take_along_axis(probas, p_indices, axis=1)  # 排序概率
             cumsum_probas = np.cumsum(probas, axis=1)  # 累积概率
             flag = np.roll(cumsum_probas >= topp, 1, axis=1)  # 标记超过topp的部分
             flag[:, 0] = False  # 结合上面的np.roll,实现平移一位的效果
             probas[flag] = 0  # 后面的全部置零
             probas /= probas.sum(axis=1, keepdims=True)  # 重新归一化
         sample_func = lambda p: np.random.choice(len(p), p=p)  # 按概率采样函数
         sample_ids = np.apply_along_axis(sample_func, 1, probas)  # 执行采样
         sample_ids = sample_ids.reshape((-1, 1))  # 对齐形状
         if topp is not None:
             sample_ids = np.take_along_axis(
                 p_indices, sample_ids, axis=1
             )  # 对齐原id
         if topk is not None:
             sample_ids = np.take_along_axis(
                 k_indices, sample_ids, axis=1
             )  # 对齐原id
         output_ids = np.concatenate([output_ids, sample_ids], 1)  # 更新输出
         is_end = output_ids[:, -1] == self.end_id  # 标记是否以end标记结束
         end_counts = (output_ids == self.end_id).sum(1)  # 统计出现的end标记
         if output_ids.shape[1] >= self.minlen:  # 最短长度判断
             flag = is_end & (end_counts >= min_ends)  # 标记已完成序列
             if flag.any():  # 如果有已完成的
                 for ids in output_ids[flag]:  # 存好已完成序列
                     results.append(ids)
                 flag = (flag == False)  # 标记未完成序列
                 inputs = [i[flag] for i in inputs]  # 只保留未完成部分输入
                 output_ids = output_ids[flag]  # 只保留未完成部分候选集
                 end_counts = end_counts[flag]  # 只保留未完成部分end计数
                 if len(output_ids) == 0:
                     break
     # 如果还有未完成序列,直接放入结果
     for ids in output_ids:
         results.append(ids)
     # 返回结果
     return results
예제 #5
0
def main():
    load_start = dt.datetime.now()
    #Try parsing arguments using argparse
    parser = argparse.ArgumentParser(
        description='wrf non-parallel convective diagnostics processer')
    parser.add_argument("-m", help="Model name", required=True)
    parser.add_argument("-r",
                        help="Region name (default is aus)",
                        default="aus")
    parser.add_argument("-t1", help="Time start YYYYMMDDHH", required=True)
    parser.add_argument("-t2", help="Time end YYYYMMDDHH", required=True)
    parser.add_argument(
        "-e",
        help=
        "CMIP5 experiment name (not required if using era5, erai or barra)",
        default="")
    parser.add_argument(
        "--barpa_forcing_mdl",
        help="BARPA forcing model (erai or ACCESS1-0). Default erai.",
        default="erai")
    parser.add_argument(
        "--ens",
        help="CMIP5 ensemble name (not required if using era5, erai or barra)",
        default="r1i1p1")
    parser.add_argument("--group",
                        help="CMIP6 modelling group name",
                        default="")
    parser.add_argument("--project",
                        help="CMIP6 modelling intercomparison project",
                        default="CMIP")
    parser.add_argument("--ver6hr",
                        help="Version on al33 for 6hr data",
                        default="")
    parser.add_argument("--ver3hr",
                        help="Version on al33 for 3hr data",
                        default="")
    parser.add_argument("--issave",
                        help="Save output (True or False, default is False)",
                        default="False")
    parser.add_argument(
        "--ub4",
        help=
        "Where to get era5 data. Default True for ub4 project, otherwise rt52",
        default="True")
    parser.add_argument(
        "--outname",
        help=
        "Name of saved output. In the form *outname*_*t1*_*t2*.nc. Default behaviour is the model name",
        default=None)
    parser.add_argument(
        "--is_dcape",
        help="Should DCAPE be calculated? (1 or 0. Default is 1)",
        default=1)
    parser.add_argument(
        "--al33",
        help=
        "Should data be gathered from al33? Default is False, and data is gathered from r87. If True, then group is required",
        default="False")
    parser.add_argument(
        "--delta_t",
        help=
        "Time step spacing for ERA5 data, in hours. Default is one the minimum spacing (1 hour)",
        default="1")
    parser.add_argument(
        "--era5_interp",
        help=
        "Horizontally interpolate model data before calculating convective parameters",
        default="False")
    args = parser.parse_args()

    #Parse arguments from cmd line and set up inputs (date region model)
    model = args.m
    region = args.r
    t1 = args.t1
    t2 = args.t2
    issave = args.issave
    ub4 = args.ub4
    al33 = args.al33
    if args.outname == None:
        out_name = model
    else:
        out_name = args.outname
    is_dcape = args.is_dcape
    barpa_forcing_mdl = args.barpa_forcing_mdl
    experiment = args.e
    ensemble = args.ens
    group = args.group
    project = args.project
    ver6hr = args.ver6hr
    ver3hr = args.ver3hr
    delta_t = int(args.delta_t)
    era5_interp = args.era5_interp
    if region == "sa_small":
        start_lat = -38
        end_lat = -26
        start_lon = 132
        end_lon = 142
    elif region == "aus":
        start_lat = -44.525
        end_lat = -9.975
        start_lon = 111.975
        end_lon = 156.275
    elif region == "global":
        start_lat = -70
        end_lat = 70
        start_lon = -180
        end_lon = 179.75
    else:
        raise ValueError("INVALID REGION\n")
    domain = [start_lat, end_lat, start_lon, end_lon]
    try:
        time = [
            dt.datetime.strptime(t1, "%Y%m%d%H"),
            dt.datetime.strptime(t2, "%Y%m%d%H")
        ]
    except:
        raise ValueError("INVALID START OR END TIME. SHOULD BE YYYYMMDDHH\n")
    if era5_interp == "True":
        era5_interp = True
    elif era5_interp == "False":
        era5_interp = False
    else:
        raise ValueError("\n INVALID era5_interp...SHOULD BE True OR False")
    if ub4 == "True":
        ub4 = True
    elif ub4 == "False":
        ub4 = False
    else:
        raise ValueError("\n INVALID ub4...SHOULD BE True OR False")
    if issave == "True":
        issave = True
    elif issave == "False":
        issave = False
    else:
        raise ValueError("\n INVALID ISSAVE...SHOULD BE True OR False")
    if al33 == "True":
        al33 = True
    elif al33 == "False":
        al33 = False
    else:
        raise ValueError("\n INVALID al33...SHOULD BE True OR False")

    #Load data
    print("LOADING DATA...")
    if model == "erai":
        ta,temp1,hur,hgt,terrain,p,ps,wap,ua,va,uas,vas,tas,ta2d,\
         cp,tp,wg10,mod_cape,lon,lat,date_list = \
         read_erai(domain,time)
        cp = cp.astype("float32", order="C")
        tp = tp.astype("float32", order="C")
        mod_cape = mod_cape.astype("float32", order="C")
    elif model == "era5":
        if ub4:
            ta,temp1,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,\
             cp,wg10,mod_cape,lon,lat,date_list = \
             read_era5(domain,time,delta_t=delta_t)
        else:
            ta,temp1,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,\
             cp,tp,wg10,mod_cape,lon,lat,date_list = \
             read_era5_rt52(domain,time,delta_t=delta_t)
        cp = cp.astype("float32", order="C")
        tp = tp.astype("float32", order="C")
        mod_cape = mod_cape.astype("float32", order="C")
        wap = np.zeros(hgt.shape)
    elif model == "barra":
        ta,temp1,hur,hgt,terrain,p,ps,wap,ua,va,uas,vas,tas,ta2d,wg10,lon,lat,date_list = \
         read_barra(domain,time)
    elif model == "barra_fc":
        ta,temp1,hur,hgt,terrain,p,ps,wap,ua,va,uas,vas,tas,ta2d,wg10,lon,lat,date_list = \
         read_barra_fc(domain,time)
    elif model == "barpa":
        ta,hur,hgt,terrain,p,ps,ua,va,uas,vas,tas,ta2d,wg10,lon,lat,date_list = \
         read_barpa(domain, time, experiment, barpa_forcing_mdl, ensemble)
        wap = np.zeros(hgt.shape)
        temp1 = None
    elif model == "barra_ad":
        wg10,temp2,ta,temp1,hur,hgt,terrain,p,ps,wap,ua,va,uas,vas,tas,ta2d,lon,lat,date_list = \
         read_barra_ad(domain, time, False)
    elif model in ["ACCESS1-0","ACCESS1-3","GFDL-CM3","GFDL-ESM2M","CNRM-CM5","MIROC5",\
         "MRI-CGCM3","IPSL-CM5A-LR","IPSL-CM5A-MR","GFDL-ESM2G","bcc-csm1-1","MIROC-ESM",\
         "BNU-ESM"]:
        #Check that t1 and t2 are in the same year
        year = np.arange(int(t1[0:4]), int(t2[0:4]) + 1)
        ta, hur, hgt, terrain, p_3d, ps, ua, va, uas, vas, tas, ta2d, tp, lon, lat, \
            date_list = read_cmip(model, experiment, \
            ensemble, year, domain, cmip_ver=5, al33=al33, group=group, ver6hr=ver6hr, ver3hr=ver3hr)
        wap = np.zeros(hgt.shape)
        wg10 = np.zeros(ps.shape)
        mod_cape = np.zeros(ps.shape)
        p = np.zeros(p_3d[0, :, 0, 0].shape)
        #date_list = pd.to_datetime(date_list).to_pydatetime()
        temp1 = None
        tp = tp.astype("float32", order="C")
    elif model in ["ACCESS-ESM1-5", "ACCESS-CM2"]:
        year = np.arange(int(t1[0:4]), int(t2[0:4]) + 1)
        ta, hur, hgt, terrain, p_3d, ps, ua, va, uas, vas, tas, ta2d, lon, lat, \
            date_list = read_cmip(model, experiment,\
            ensemble, year, domain, cmip_ver=6, group=group, project=project)
        wap = np.zeros(hgt.shape)
        wg10 = np.zeros(ps.shape)
        p = np.zeros(p_3d[0, :, 0, 0].shape)
        #date_list = pd.to_datetime(date_list).to_pydatetime()
        temp1 = None
    else:
        raise ValueError("Model not recognised")
    del temp1
    ta = ta.astype("float32", order="C")
    hur = hur.astype("float32", order="C")
    hgt = hgt.astype("float32", order="C")
    terrain = terrain.astype("float32", order="C")
    p = p.astype("float32", order="C")
    ps = ps.astype("float32", order="C")
    wap = wap.astype("float32", order="C")
    ua = ua.astype("float32", order="C")
    va = va.astype("float32", order="C")
    uas = uas.astype("float32", order="C")
    vas = vas.astype("float32", order="C")
    tas = tas.astype("float32", order="C")
    ta2d = ta2d.astype("float32", order="C")
    wg10 = wg10.astype("float32", order="C")
    lon = lon.astype("float32", order="C")
    lat = lat.astype("float32", order="C")

    gc.collect()

    param = np.array([
        "mu_cape", "mu_cin", "muq", "s06", "s0500", "lr700_500", "mhgt",
        "ta500", "tp"
    ])

    if model in ["erai", "era5"]:
        param = np.concatenate([param, ["mod_cape"]])

    #Option to interpolate to the ERA5 grid
    if era5_interp:
        #Interpolate model data to the ERA5 grid
        from era5_read import get_lat_lon_rt52 as get_era5_lat_lon
        era5_lon, era5_lat = get_era5_lat_lon()
        era5_lon_ind = np.where((era5_lon >= domain[2])
                                & (era5_lon <= domain[3]))[0]
        era5_lat_ind = np.where((era5_lat >= domain[0])
                                & (era5_lat <= domain[1]))[0]
        era5_lon = era5_lon[era5_lon_ind]
        era5_lat = era5_lat[era5_lat_ind]
        terrain = interp_era5(terrain, lon, lat, era5_lon, era5_lat, d3=False)
        #Set output array
        output_data = np.zeros(
            (ps.shape[0], era5_lat.shape[0], era5_lon.shape[0], len(param)))
    else:
        output_data = np.zeros(
            (ps.shape[0], ps.shape[1], ps.shape[2], len(param)))

    #Assign p levels to a 3d array, with same dimensions as input variables (ta, hgt, etc.)
    #If the 3d p-lvl array already exists, then declare the variable "mdl_lvl" as true.
    try:
        p_3d
        mdl_lvl = True
        full_p3d = p_3d
    except:
        mdl_lvl = False
        if era5_interp:
            p_3d = np.moveaxis(np.tile(p,[ta.shape[2],ta.shape[3],1]),[0,1,2],[1,2,0]).\
                astype(np.float32)
        else:
            p_3d = np.moveaxis(np.tile(p,[era5_lat.shape[0],era5_lon.shape[0],1]),[0,1,2],[1,2,0]).\
                astype(np.float32)

    print("LOAD TIME..." + str(dt.datetime.now() - load_start))
    tot_start = dt.datetime.now()

    for t in np.arange(0, ta.shape[0]):
        cape_start = dt.datetime.now()

        if era5_interp:
            ta_t = interp_era5(ta[t], lon, lat, era5_lon, era5_lat, d3=True)
            hur_t = interp_era5(hur[t], lon, lat, era5_lon, era5_lat, d3=True)
            hgt_t = interp_era5(hgt[t], lon, lat, era5_lon, era5_lat, d3=True)
            ps_t = interp_era5(ps[t], lon, lat, era5_lon, era5_lat, d3=False)
            wap_t = interp_era5(wap[t], lon, lat, era5_lon, era5_lat, d3=True)
            ua_t = interp_era5(ua[t], lon, lat, era5_lon, era5_lat, d3=True)
            va_t = interp_era5(va[t], lon, lat, era5_lon, era5_lat, d3=True)
            uas_t = interp_era5(uas[t], lon, lat, era5_lon, era5_lat, d3=False)
            vas_t = interp_era5(vas[t], lon, lat, era5_lon, era5_lat, d3=False)
            tas_t = interp_era5(tas[t], lon, lat, era5_lon, era5_lat, d3=False)
            ta2d_t = interp_era5(ta2d[t],
                                 lon,
                                 lat,
                                 era5_lon,
                                 era5_lat,
                                 d3=False)
            tp_t = interp_era5(tp[t], lon, lat, era5_lon, era5_lat, d3=False)
            mod_cape_t = interp_era5(mod_cape[t],
                                     lon,
                                     lat,
                                     era5_lon,
                                     era5_lat,
                                     d3=False)
        else:
            ta_t = ta[t]
            hur_t = hur[t]
            hgt_t = hgt[t]
            ps_t = ps[t]
            wap_t = wap[t]
            ua_t = ua[t]
            va_t = va[t]
            uas_t = uas[t]
            vas_t = vas[t]
            tas_t = tas[t]
            ta2d_t = ta2d[t]
            tp_t = tp[t]
            mod_cape_t = mod_cape[t]
        print(date_list[t])
        output = np.zeros((1, ps_t.shape[0], ps_t.shape[1], len(param)))

        if mdl_lvl:
            if era5_interp:
                p_3d = interp_era5(full_p3d[t],
                                   lon,
                                   lat,
                                   era5_lon,
                                   era5_lat,
                                   d3=True)
            else:
                p_3d = full_p3d[t]

        dp = get_dp(hur=hur_t, ta=ta_t, dp_mask=False)

        #Insert surface arrays, creating new arrays with "sfc" prefix
        sfc_ta = np.insert(ta_t, 0, tas_t, axis=0)
        sfc_hgt = np.insert(hgt_t, 0, terrain, axis=0)
        sfc_dp = np.insert(dp, 0, ta2d_t, axis=0)
        sfc_p_3d = np.insert(p_3d, 0, ps_t, axis=0)
        sfc_ua = np.insert(ua_t, 0, uas_t, axis=0)
        sfc_va = np.insert(va_t, 0, vas_t, axis=0)
        sfc_wap = np.insert(wap_t, 0, np.zeros(vas_t.shape), axis=0)

        #Sort by ascending p
        a,temp1,temp2 = np.meshgrid(np.arange(sfc_p_3d.shape[0]) , np.arange(sfc_p_3d.shape[1]),\
          np.arange(sfc_p_3d.shape[2]))
        sort_inds = np.flip(np.lexsort([np.swapaxes(a, 1, 0), sfc_p_3d],
                                       axis=0),
                            axis=0)
        sfc_hgt = np.take_along_axis(sfc_hgt, sort_inds, axis=0)
        sfc_dp = np.take_along_axis(sfc_dp, sort_inds, axis=0)
        sfc_p_3d = np.take_along_axis(sfc_p_3d, sort_inds, axis=0)
        sfc_ua = np.take_along_axis(sfc_ua, sort_inds, axis=0)
        sfc_va = np.take_along_axis(sfc_va, sort_inds, axis=0)
        sfc_ta = np.take_along_axis(sfc_ta, sort_inds, axis=0)

        #Calculate q and wet bulb for pressure level arrays with surface values
        sfc_ta_unit = units.units.degC * sfc_ta
        sfc_dp_unit = units.units.degC * sfc_dp
        sfc_p_unit = units.units.hectopascals * sfc_p_3d
        hur_unit = mpcalc.relative_humidity_from_dewpoint(ta_t*units.units.degC, dp*units.units.degC)*\
         100*units.units.percent
        q_unit = mpcalc.mixing_ratio_from_relative_humidity(hur_unit,\
         ta_t*units.units.degC,np.array(p_3d)*units.units.hectopascals)
        sfc_hur_unit = mpcalc.relative_humidity_from_dewpoint(sfc_ta_unit, sfc_dp_unit)*\
         100*units.units.percent
        sfc_q_unit = mpcalc.mixing_ratio_from_relative_humidity(sfc_hur_unit,\
         sfc_ta_unit,sfc_p_unit)
        sfc_theta_unit = mpcalc.potential_temperature(sfc_p_unit, sfc_ta_unit)
        sfc_thetae_unit = mpcalc.equivalent_potential_temperature(
            sfc_p_unit, sfc_ta_unit, sfc_dp_unit)
        sfc_thetae = np.array(mpcalc.equivalent_potential_temperature(ps_t*units.units.hectopascals,tas_t*units.units.degC,\
              ta2d_t*units.units.degC))
        sfc_q = np.array(sfc_q_unit)
        sfc_hur = np.array(sfc_hur_unit)
        #sfc_wb = np.array(wrf.wetbulb( sfc_p_3d*100, sfc_ta+273.15, sfc_q, units="degC"))

        #Use getcape.f90
        #cape_gb_mu1, cape_gb_mu4 = getcape_driver(sfc_p_3d, sfc_ta, sfc_dp, ps_t)

        #Now get most-unstable CAPE (max CAPE in vertical, ensuring parcels used are AGL)
        cape3d = wrf.cape_3d(sfc_p_3d,sfc_ta+273.15,\
          sfc_q,sfc_hgt,\
          terrain,ps_t,\
          True,meta=False, missing=0)
        cape = cape3d.data[0]
        cin = cape3d.data[1]
        lfc = cape3d.data[2]
        lcl = cape3d.data[3]
        el = cape3d.data[4]
        #Mask values which are below the surface and above 350 hPa AGL
        cape[(sfc_p_3d > ps_t) | (sfc_p_3d < (ps_t - 350))] = np.nan
        cin[(sfc_p_3d > ps_t) | (sfc_p_3d < (ps_t - 350))] = np.nan
        lfc[(sfc_p_3d > ps_t) | (sfc_p_3d < (ps_t - 350))] = np.nan
        lcl[(sfc_p_3d > ps_t) | (sfc_p_3d < (ps_t - 350))] = np.nan
        el[(sfc_p_3d > ps_t) | (sfc_p_3d < (ps_t - 350))] = np.nan
        #Get maximum (in the vertical), and get cin, lfc, lcl for the same parcel
        mu_cape_inds = np.tile(np.nanargmax(cape, axis=0),
                               (cape.shape[0], 1, 1))
        mu_cape = np.take_along_axis(cape, mu_cape_inds, 0)[0]
        mu_cin = np.take_along_axis(cin, mu_cape_inds, 0)[0]
        mu_lfc = np.take_along_axis(lfc, mu_cape_inds, 0)[0]
        mu_lcl = np.take_along_axis(lcl, mu_cape_inds, 0)[0]
        mu_el = np.take_along_axis(el, mu_cape_inds, 0)[0]
        muq = np.take_along_axis(sfc_q, mu_cape_inds, 0)[0] * 1000

        #Calculate other parameters
        #Thermo
        thermo_start = dt.datetime.now()
        lr700_500 = get_lr_p(ta_t, p_3d, hgt_t, 700, 500)
        melting_hgt = get_t_hgt(sfc_ta, np.copy(sfc_hgt), 0, terrain)
        melting_hgt = np.where((melting_hgt < 0) | (np.isnan(melting_hgt)), 0,
                               melting_hgt)
        ta500 = get_var_p_lvl(np.copy(sfc_ta), sfc_p_3d, 500)
        ta925 = get_var_p_lvl(np.copy(sfc_ta), sfc_p_3d, 925)
        ta850 = get_var_p_lvl(np.copy(sfc_ta), sfc_p_3d, 850)
        ta700 = get_var_p_lvl(np.copy(sfc_ta), sfc_p_3d, 700)
        rho = mpcalc.density(
            np.array(sfc_p_3d) * (units.units.hectopascal),
            sfc_ta * units.units.degC, sfc_q_unit)
        rho925 = np.array(get_var_p_lvl(np.array(rho), sfc_p_3d, 925))
        rho850 = np.array(get_var_p_lvl(np.array(rho), sfc_p_3d, 850))
        rho700 = np.array(get_var_p_lvl(np.array(rho), sfc_p_3d, 700))
        #Winds
        winds_start = dt.datetime.now()
        s06 = get_shear_hgt(sfc_ua, sfc_va, np.copy(sfc_hgt), 0, 6000, terrain)
        s0500 = get_shear_p(ua_t,
                            va_t,
                            p_3d,
                            "sfc",
                            np.array([500]),
                            p_3d,
                            uas=uas_t,
                            vas=vas_t)[0]

        #WAP
        if model in ["erai", "era5"]:
            sfc_w = mpcalc.vertical_velocity( wap_t * (units.units.pascal / units.units.second),\
             np.array(p_3d) * (units.units.hectopascal), \
             ta_t * units.units.degC,  q_unit)
            w925 = np.array(get_var_p_lvl(np.array(sfc_w), p_3d, 925))
            w850 = np.array(get_var_p_lvl(np.array(sfc_w), p_3d, 850))
            w700 = np.array(get_var_p_lvl(np.array(sfc_w), p_3d, 700))

        #Convergence
        if era5_interp:
            x, y = np.meshgrid(era5_lon, era5_lat)
        else:
            x, y = np.meshgrid(lon, lat)
        dx, dy = mpcalc.lat_lon_grid_deltas(x, y)
        u925 = np.array(get_var_p_lvl(np.copy(sfc_ua), sfc_p_3d, 925))
        u850 = np.array(get_var_p_lvl(np.copy(sfc_ua), sfc_p_3d, 850))
        u700 = np.array(get_var_p_lvl(np.copy(sfc_ua), sfc_p_3d, 700))
        v925 = np.array(get_var_p_lvl(np.copy(sfc_va), sfc_p_3d, 925))
        v850 = np.array(get_var_p_lvl(np.copy(sfc_va), sfc_p_3d, 850))
        v700 = np.array(get_var_p_lvl(np.copy(sfc_va), sfc_p_3d, 700))
        conv925 = -1e5 * np.array(
            mpcalc.divergence(u925 * (units.units.meter / units.units.second),
                              v925 * (units.units.meter / units.units.second),
                              dx, dy))
        conv850 = -1e5 * np.array(
            mpcalc.divergence(u850 * (units.units.meter / units.units.second),
                              v850 * (units.units.meter / units.units.second),
                              dx, dy))
        conv700 = -1e5 * np.array(
            mpcalc.divergence(u700 * (units.units.meter / units.units.second),
                              v700 * (units.units.meter / units.units.second),
                              dx, dy))

        #CS6
        mucs6 = mu_cape * np.power(s06, 1.67)

        #Fill output
        output = fill_output(output, t, param, ps, "mu_cape", mu_cape)
        output = fill_output(output, t, param, ps, "mu_cin", mu_cin)
        output = fill_output(output, t, param, ps, "muq", muq)
        output = fill_output(output, t, param, ps, "s06", s06)
        output = fill_output(output, t, param, ps, "s0500", s0500)
        output = fill_output(output, t, param, ps, "lr700_500", lr700_500)
        output = fill_output(output, t, param, ps, "ta500", ta500)
        output = fill_output(output, t, param, ps, "mhgt", melting_hgt)
        output = fill_output(output, t, param, ps, "tp", tp_t)
        if (model == "erai") | (model == "era5"):
            output = fill_output(output, t, param, ps, "mod_cape", mod_cape_t)

        output_data[t] = output

    print("SAVING DATA...")
    param_out = []
    for param_name in param:
        temp_data = output_data[:, :, :, np.where(param == param_name)[0][0]]
        param_out.append(temp_data)

    #If the mhgt variable is zero everywhere, then it is likely that data has not been read.
    #In this case, all values are missing, set to zero.
    for t in np.arange(param_out[0].shape[0]):
        if param_out[np.where(param == "mhgt")[0][0]][t].max() == 0:
            for p in np.arange(len(param_out)):
                param_out[p][t] = np.nan

    if issave:
        if era5_interp:
            save_netcdf(region, model, out_name, date_list, era5_lat, era5_lon, param, param_out, \
             out_dtype = "f4", compress=True)
        else:
            save_netcdf(region, model, out_name, date_list, lat, lon, param, param_out, \
             out_dtype = "f4", compress=True)

    print(dt.datetime.now() - tot_start)
예제 #6
0
    print('#'*80)
    print('Test mesh properties')
    print('#'*80)
    for name, mesh in mesh_generator():
        check_properties(name, mesh)

    # fix triangle orientation
    print('#'*80)
    print('Fix triangle orientation')
    print('#'*80)
    for name, mesh in mesh_generator():
        mesh.compute_vertex_normals()
        triangles = np.asarray(mesh.triangles)
        rnd_idx = np.random.rand(*triangles.shape).argsort(axis=1)
        rnd_idx[0] = (0, 1, 2)
        triangles = np.take_along_axis(triangles, rnd_idx, axis=1)
        mesh.triangles = Vector3iVector(triangles)
        draw_geometries([mesh])
        sucess = mesh.orient_triangles()
        print('%s orientated: %s' % (name, 'yes' if sucess else 'no'))
        draw_geometries([mesh])

    # intersection tests
    print('#'*80)
    print('Intersection tests')
    print('#'*80)
    np.random.seed(30)
    bbox = create_mesh_box(20,20,20).translate((-10,-10,-10))
    meshes = [create_mesh_box() for _ in range(20)]
    meshes.append(create_mesh_sphere())
    meshes.append(create_mesh_cone())
예제 #7
0
    def get_vector_library(self, reciprocal_radius):
        """Calculates a library of diffraction vectors and pairwise inter-vector
        angles for a library of crystal structures.

        Parameters
        ----------
        reciprocal_radius : float
            The maximum g-vector magnitude to be included in the library.

        Returns
        -------
        vector_library : :class:`DiffractionVectorLibrary`
            Mapping of phase identifier to phase information in dictionary
            format.
        """
        # Define DiffractionVectorLibrary object to contain results
        vector_library = DiffractionVectorLibrary()
        # Get structures from structure library
        structure_library = self.structures.struct_lib
        # Iterate through phases in library.
        for phase_name in structure_library.keys():
            # Get diffpy.structure object associated with phase
            structure = structure_library[phase_name][0]
            # Get reciprocal lattice points within reciprocal_radius
            recip_latt = structure.lattice.reciprocal()
            miller_indices, coordinates, distances = get_points_in_sphere(
                recip_latt, reciprocal_radius)

            # Create pair_indices for selecting all point pair combinations
            num_indices = len(miller_indices)
            pair_a_indices, pair_b_indices = np.mgrid[:num_indices, :
                                                      num_indices]

            # Only select one of the permutations and don't pair an index with
            # itself (select above diagonal)
            upper_indices = np.triu_indices(num_indices, 1)
            pair_a_indices = pair_a_indices[upper_indices].ravel()
            pair_b_indices = pair_b_indices[upper_indices].ravel()

            # Mask off origin (0, 0, 0)
            origin_index = num_indices // 2
            pair_a_indices = pair_a_indices[pair_a_indices != origin_index]
            pair_b_indices = pair_b_indices[pair_b_indices != origin_index]

            pair_indices = np.vstack([pair_a_indices, pair_b_indices])

            # Create library entries
            angles = get_angle_cartesian_vec(coordinates[pair_a_indices],
                                             coordinates[pair_b_indices])
            pair_distances = distances[pair_indices.T]
            # Ensure longest vector is first
            len_sort = np.fliplr(pair_distances.argsort(axis=1))
            # phase_index_pairs is a list of [hkl1, hkl2]
            phase_index_pairs = np.take_along_axis(
                miller_indices[pair_indices.T],
                len_sort[:, :, np.newaxis],
                axis=1)
            # phase_measurements is a list of [len1, len2, angle]
            phase_measurements = np.column_stack(
                (np.take_along_axis(pair_distances, len_sort, axis=1), angles))

            # Only keep unique triplets
            unique_measurements, unique_measurement_indices = np.unique(
                phase_measurements, axis=0, return_index=True)
            vector_library[phase_name] = {
                'indices': phase_index_pairs[unique_measurement_indices],
                'measurements': unique_measurements
            }

        # Pass attributes to diffraction library from structure library.
        vector_library.identifiers = self.structures.identifiers
        vector_library.structures = self.structures.structures
        vector_library.reciprocal_radius = reciprocal_radius

        return vector_library
예제 #8
0
파일: pitch.py 프로젝트: lostanlen/librosa
def yin(
    y,
    *,
    fmin,
    fmax,
    sr=22050,
    frame_length=2048,
    win_length=None,
    hop_length=None,
    trough_threshold=0.1,
    center=True,
    pad_mode="constant",
):
    """Fundamental frequency (F0) estimation using the YIN algorithm.

    YIN is an autocorrelation based method for fundamental frequency estimation [#]_.
    First, a normalized difference function is computed over short (overlapping) frames of audio.
    Next, the first minimum in the difference function below ``trough_threshold`` is selected as
    an estimate of the signal's period.
    Finally, the estimated period is refined using parabolic interpolation before converting
    into the corresponding frequency.

    .. [#] De Cheveigné, Alain, and Hideki Kawahara.
        "YIN, a fundamental frequency estimator for speech and music."
        The Journal of the Acoustical Society of America 111.4 (2002): 1917-1930.

    Parameters
    ----------
    y : np.ndarray [shape=(..., n)]
        audio time series. Multi-channel is supported..
    fmin : number > 0 [scalar]
        minimum frequency in Hertz.
        The recommended minimum is ``librosa.note_to_hz('C2')`` (~65 Hz)
        though lower values may be feasible.
    fmax : number > 0 [scalar]
        maximum frequency in Hertz.
        The recommended maximum is ``librosa.note_to_hz('C7')`` (~2093 Hz)
        though higher values may be feasible.
    sr : number > 0 [scalar]
        sampling rate of ``y`` in Hertz.
    frame_length : int > 0 [scalar]
        length of the frames in samples.
        By default, ``frame_length=2048`` corresponds to a time scale of about 93 ms at
        a sampling rate of 22050 Hz.
    win_length : None or int > 0 [scalar]
        length of the window for calculating autocorrelation in samples.
        If ``None``, defaults to ``frame_length // 2``
    hop_length : None or int > 0 [scalar]
        number of audio samples between adjacent YIN predictions.
        If ``None``, defaults to ``frame_length // 4``.
    trough_threshold : number > 0 [scalar]
        absolute threshold for peak estimation.
    center : boolean
        If ``True``, the signal `y` is padded so that frame
        ``D[:, t]`` is centered at `y[t * hop_length]`.
        If ``False``, then ``D[:, t]`` begins at ``y[t * hop_length]``.
        Defaults to ``True``,  which simplifies the alignment of ``D`` onto a
        time grid by means of ``librosa.core.frames_to_samples``.
    pad_mode : string or function
        If ``center=True``, this argument is passed to ``np.pad`` for padding
        the edges of the signal ``y``. By default (``pad_mode="constant"``),
        ``y`` is padded on both sides with zeros.
        If ``center=False``,  this argument is ignored.
        .. see also:: `np.pad`

    Returns
    -------
    f0: np.ndarray [shape=(..., n_frames)]
        time series of fundamental frequencies in Hertz.

        If multi-channel input is provided, f0 curves are estimated separately for each channel.

    See Also
    --------
    librosa.pyin :
        Fundamental frequency (F0) estimation using probabilistic YIN (pYIN).

    Examples
    --------
    Computing a fundamental frequency (F0) curve from an audio input

    >>> y = librosa.chirp(fmin=440, fmax=880, duration=5.0)
    >>> librosa.yin(y, fmin=440, fmax=880)
    array([442.66354675, 441.95299983, 441.58010963, ...,
        871.161732  , 873.99001454, 877.04297681])
    """

    if fmin is None or fmax is None:
        raise ParameterError('both "fmin" and "fmax" must be provided')

    # Set the default window length if it is not already specified.
    if win_length is None:
        win_length = frame_length // 2

    if win_length >= frame_length:
        raise ParameterError(
            "win_length={} cannot exceed given frame_length={}".format(
                win_length, frame_length))

    # Set the default hop if it is not already specified.
    if hop_length is None:
        hop_length = frame_length // 4

    # Check that audio is valid.
    util.valid_audio(y, mono=False)

    # Pad the time series so that frames are centered
    if center:
        padding = [(0, 0) for _ in y.shape]
        padding[-1] = (frame_length // 2, frame_length // 2)
        y = np.pad(y, padding, mode=pad_mode)

    # Frame audio.
    y_frames = util.frame(y, frame_length=frame_length, hop_length=hop_length)

    # Calculate minimum and maximum periods
    min_period = max(int(np.floor(sr / fmax)), 1)
    max_period = min(int(np.ceil(sr / fmin)), frame_length - win_length - 1)

    # Calculate cumulative mean normalized difference function.
    yin_frames = _cumulative_mean_normalized_difference(
        y_frames, frame_length, win_length, min_period, max_period)

    # Parabolic interpolation.
    parabolic_shifts = _parabolic_interpolation(yin_frames)

    # Find local minima.
    is_trough = util.localmin(yin_frames, axis=-2)
    is_trough[..., 0, :] = yin_frames[..., 0, :] < yin_frames[..., 1, :]

    # Find minima below peak threshold.
    is_threshold_trough = np.logical_and(is_trough,
                                         yin_frames < trough_threshold)

    # Absolute threshold.
    # "The solution we propose is to set an absolute threshold and choose the
    # smallest value of tau that gives a minimum of d' deeper than
    # this threshold. If none is found, the global minimum is chosen instead."
    target_shape = list(yin_frames.shape)
    target_shape[-2] = 1

    global_min = np.argmin(yin_frames, axis=-2)
    yin_period = np.argmax(is_threshold_trough, axis=-2)

    global_min = global_min.reshape(target_shape)
    yin_period = yin_period.reshape(target_shape)

    no_trough_below_threshold = np.all(~is_threshold_trough,
                                       axis=-2,
                                       keepdims=True)
    yin_period[no_trough_below_threshold] = global_min[
        no_trough_below_threshold]

    # Refine peak by parabolic interpolation.

    yin_period = (
        min_period + yin_period +
        np.take_along_axis(parabolic_shifts, yin_period, axis=-2))[..., 0, :]

    # Convert period to fundamental frequency.
    f0 = sr / yin_period
    return f0
예제 #9
0
 def generate_curve(self, truth, proposals, scores, interpolate=True):
     """
     Generates PR curves given true query and proposal poses.
     Can select interpolation of precision, where precision values
     are replaced with maximum precision for all recall values
     greater or equal to current recall.
     """
     t_errs, R_errs = self._compute_errors(truth, proposals)
     scores_u = np.unique(scores)
     max_score = np.max(scores_u)
     self.scores = np.linspace(
         np.min(scores_u) - 1e-3, max_score + 1e-3, endpoint=True, num=1000
     )
     if self.model in ["Single", "Seq Match", "Graph"]:
         # ensures iterating through list means higher
         # model confidence
         self.scores = np.flip(self.scores)
     self.precisions = np.ones_like(self.scores)
     self.recalls = np.zeros_like(self.scores)
     self.F1 = np.zeros_like(self.scores)
     for i, score_thres in enumerate(self.scores):
         if self.model in ["Seq Match", "Single"]:
             localized = scores < score_thres
             t_err = t_errs
             R_err = R_errs
         else:
             ind_loc = self._localize_indices(scores, score_thres)
             # identify traverses where threshold met
             localized = ind_loc != -1
             t_err = np.squeeze(
                 np.take_along_axis(t_errs, ind_loc[:, np.newaxis], 1)
             )
             R_err = np.squeeze(
                 np.take_along_axis(R_errs, ind_loc[:, np.newaxis], 1)
             )
         correct = np.logical_and(t_err < self.t, R_err < self.R)
         # only count traverses with a proposal
         correct = np.logical_and(correct, localized)
         # compute precision and recall
         # index of -1 means not localized in max seq len
         nLocalized = np.count_nonzero(localized)
         nCorrect = np.count_nonzero(correct)
         if nLocalized > 0:
             # if none localized, precision = 1 by default
             self.precisions[i] = nCorrect / nLocalized
             if nCorrect + len(localized) - nLocalized > 0:
                 self.recalls[i] = nCorrect / (
                     nCorrect + len(localized) - nLocalized
                 )
     # flip curves for increasing recall
     self.precisions = np.flip(self.precisions)
     self.recalls = np.flip(self.recalls)
     self.scores = np.flip(self.scores)
     # ensure recalls are nondecreasing
     self.recalls, inds = np.unique(self.recalls, return_index=True)
     self.precisions = self.precisions[inds]
     self.scores = self.scores[inds]
     # chop off curve when recall first reaches 1
     ind_min = np.min(np.argwhere(self.recalls >= 1.0))
     self.recalls = self.recalls[: ind_min + 1]
     self.precisions = self.precisions[: ind_min + 1]
     self.scores = self.scores[: ind_min + 1]
     # interpolate precision, take max precision for
     # recall greater than raw recall
     if interpolate:
         for i in range(len(self.precisions)):
             self.precisions[i] = np.max(self.precisions[i:])
     return None
예제 #10
0
def survival_score(front, ideal_point):
    m, n = front.shape
    crowd_dist = np.zeros(m)

    if m < n:
        p = 1
        normalization = np.max(front, axis=0)
        return crowd_dist, p, normalization

    # shift the ideal point to the origin
    front = front - ideal_point

    # Detect the extreme points and normalize the front
    extreme = find_corner_solutions(front)
    front, normalization = normalize(front, extreme)

    # set the distance for the extreme solutions
    crowd_dist[extreme] = np.inf
    selected = np.full(m, False)
    selected[extreme] = True

    # approximate p(norm)
    d = point_2_line_distance(front, np.zeros(n), np.ones(n))
    d[extreme] = np.inf
    index = np.argmin(d)
    # selected(index) = true
    # crowd_dist(index) = Inf
    p = np.log(n) / np.log(1.0 / np.mean(front[index, :]))

    if np.isnan(p) or p <= 0.1:
        p = 1.0
    elif p > 20:
        p = 20.0  # avoid numpy underflow

    nn = np.linalg.norm(front, p, axis=1)
    distances = minkowski_matrix(front, front, p=p)
    distances = distances / nn[:, None]

    neighbors = 2
    remaining = np.arange(m)
    remaining = list(remaining[~selected])
    for i in range(m - np.sum(selected)):
        mg = np.meshgrid(np.arange(selected.shape[0])[selected], remaining)
        D_mg = distances[tuple(
            mg)]  # avoid Numpy's future deprecation of array special indexing

        if D_mg.shape[1] > 1:
            # equivalent to mink(distances(remaining, selected),neighbors,2); in Matlab
            maxim = np.argpartition(D_mg, neighbors - 1, axis=1)[:, :neighbors]
            tmp = np.sum(np.take_along_axis(D_mg, maxim, axis=1), axis=1)
            index: int = np.argmax(tmp)
            d = tmp[index]
        else:
            index = D_mg[:, 0].argmax()
            d = D_mg[index, 0]

        best = remaining.pop(index)
        selected[best] = True
        crowd_dist[best] = d

    return crowd_dist, p, normalization
예제 #11
0
def calculate_detection_probability(n_min_det_muts, panel_size, n_muts_cancer, hge_tumors,
                                    n_hge_normal, seq_err, sample_fraction, pval_th=None, required_mt_frags=None):
    """
    Calculate the probability to detect a tumor if there are hge_tumor hGE circulating in the entire bloodstream
    :param n_min_det_muts: number of minimally called mutations required for a positive cancer detection test
    :param panel_size: sequencing panel size
    :param n_muts_cancer: number of mutations covered by the panel that are clonally present in the tumor
    :param hge_tumors: array_like numbers of haploid genome equivalents (hGE) circulating in the entire bloodstream
    :param n_hge_normal: number of normal hGE circulating in the entire bloodstream
                        (will be multiplied by two to account for diploid genomes)
    :param seq_err: sequencing error rate per basepair
    :param sample_fraction: fraction of the bloodstream that is sampled
    :param pval_th: p-value threshold to call an individual mutation in the panel
    :param required_mt_frags: minimum number of mutated fragments required to call mutation at a given position
    :return: probability that the test will be positive
    """
    # note: every cell contains one hGE (either considering maternal or paternal copy); hence two genomes
    n_genomes_total = 2 * n_hge_normal + 2 * hge_tumors
    tumor_vaf = hge_tumors / n_genomes_total
    normal_vaf = 1 - tumor_vaf

    mt_prob = sample_fraction * ((normal_vaf * seq_err) + (tumor_vaf * (1 - seq_err)))
    seq_err_prob = sample_fraction * seq_err

    if pval_th is None and required_mt_frags is None:
        err_str = ('Either a p-value threshold or a number of required mutant fragments is needed '
                   'to compute detection probability.')
        logger.error(err_str)
        raise RuntimeError(err_str)

    elif pval_th is not None and required_mt_frags is not None:
        err_str = ('Only a p-value threshold or a number of required mutant fragments should be given '
                   'to compute detection probability. Not both.')
        logger.error(err_str)
        raise RuntimeError(err_str)

    elif required_mt_frags is not None:

        # probability to observe required_mt_frags or more mutant fragments of each of the minimally called mutations
        # for detection
        mt_prob_mt_pos = binom.sf(k=required_mt_frags - 1, n=n_genomes_total, p=mt_prob)

        # probability to observe required_mt_frags or more mutant fragments at positions not mutated in the tumor
        mt_prob_wt_pos = binom.sf(k=required_mt_frags - 1, n=n_genomes_total, p=seq_err_prob)

        # sum probabilities of combinations in which n_min_det_muts mutations can be detected
        probs = np.zeros((n_min_det_muts + 1, len(hge_tumors)))
        for det_muts in range(n_min_det_muts + 1):

            # observe a mutation with at least required_mt_frags fragments at a mutated position in the tumor
            if det_muts == n_min_det_muts:
                probs[det_muts, :] = binom.sf(k=det_muts - 1, n=n_muts_cancer, p=mt_prob_mt_pos)
            else:
                probs[det_muts, :] = binom.pmf(k=det_muts, n=n_muts_cancer, p=mt_prob_mt_pos)

            # observe a mutation with at least required_mt_frags fragments at a position not mutated in the tumor
            probs[det_muts, :] *= binom.sf(k=n_min_det_muts - det_muts - 1, n=panel_size - n_muts_cancer,
                                           p=mt_prob_wt_pos)

        # sum each column which denotes the probability that at least X mutations are detected
        det_prob = np.sum(probs, axis=0)
        logger.debug(f'Probability to observe at least {required_mt_frags} mutant fragments at the {n_min_det_muts}th '
                     + f'most mutated basepair: {det_prob}')

        return det_prob

    else:
        # extreme maximum of mutated fragments that could be expected under any conditions
        n_max_frags = int(round(max(binom.ppf(1.0 - 1e-10, n=n_genomes_total, p=seq_err_prob)))) + 2

        prob_k_more = np.zeros((n_max_frags, len(hge_tumors)))
        pvals = np.zeros_like(prob_k_more)

        for k in range(n_max_frags, 0, -1):
            # probability to observe k or more mutant fragments of each of the minimally called mutations for detection
            mt_prob_mt_pos = binom.sf(k=k - 1, n=n_genomes_total, p=mt_prob)

            # probability to observe k or more mutant fragments at positions not mutated in the tumor
            mt_prob_wt_pos = binom.sf(k=k - 1, n=n_genomes_total, p=seq_err_prob)

            # sum probabilities of combinations in which n_min_det_muts mutations can be detected
            probs = np.zeros((n_min_det_muts + 1, len(hge_tumors)))
            for det_muts in range(n_min_det_muts + 1):

                # observe a mutation with at least k fragments at a mutated position in the tumor
                if det_muts == n_min_det_muts:
                    probs[det_muts, :] = binom.sf(k=det_muts - 1, n=n_muts_cancer, p=mt_prob_mt_pos)
                else:
                    probs[det_muts, :] = binom.pmf(k=det_muts, n=n_muts_cancer, p=mt_prob_mt_pos)

                # observe a mutation with at least k fragments at a position not mutated in the tumor
                probs[det_muts, :] *= binom.sf(k=n_min_det_muts - det_muts - 1, n=panel_size - n_muts_cancer,
                                               p=mt_prob_wt_pos)

            # sum each column which denotes the probability that at least X mutations are detected
            prob_k_more[k - 1, :] = np.sum(probs, axis=0)
            logger.debug(f'Probability to observe at least {k} mutant fragments at the {n_min_det_muts}th '
                         + f'most mutated basepair: {np.mean(prob_k_more[k - 1, :]):.3e}')

            # probability to observe k or more mutant fragments at a basepair due to sequencing errors
            pvals[k - 1, :] = binom.sf(k=k - 1, n=n_genomes_total, p=seq_err_prob)

        # detection probability is equivalent to probability that p-value less or equal to p-value threshold is observed
        # take the minimal number of mutated fragments required that achieve a p-value lower or equal to the threshold
        required_mt_frags = np.argmin(pvals > pval_th, axis=0)
        # logger.info(f'{n_min_det_muts} muts required for detection requires: mean {np.mean(required_mt_frags)}, '
        #             + f'median {np.median(required_mt_frags)} mutant fragments.')
        det_prob = np.take_along_axis(prob_k_more, np.expand_dims(required_mt_frags, axis=0), axis=0)[0, :]

        return det_prob, required_mt_frags + 1
예제 #12
0
    def run_top_k_test(self,
                       layer_class,
                       k,
                       batch_size,
                       num_queries,
                       num_candidates,
                       indices_dtype,
                       use_exclusions,
                       random_seed=42,
                       check_export=True):

        layer = layer_class(k=k)

        rng = np.random.RandomState(random_seed)
        candidates = rng.normal(size=(num_candidates, 4)).astype(np.float32)
        query = rng.normal(size=(num_queries, 4)).astype(np.float32)

        candidate_indices = np.arange(num_candidates).astype(
            indices_dtype if indices_dtype is not None else np.int32)

        exclude = rng.randint(0, num_candidates, size=(num_queries, 5))

        scores = np.dot(query, candidates.T)

        # Set scores of candidates chosen for exclusion to a low value.
        adjusted_scores = scores.copy()
        if use_exclusions:
            exclude_identifiers = candidate_indices[exclude]
            for row_idx, row in enumerate(exclude):
                for col_idx in set(row):
                    adjusted_scores[row_idx, col_idx] -= 1000.0
        else:
            exclude_identifiers = None

        # Get indices based on adjusted scores, but retain actual scores.
        indices = np.argsort(-adjusted_scores, axis=1)[:, :k]
        expected_top_scores = np.take_along_axis(scores, indices, 1)
        expected_top_indices = candidate_indices[indices]

        candidates = tf.data.Dataset.from_tensor_slices(candidates).batch(
            batch_size)

        if indices_dtype is not None:
            identifiers = tf.data.Dataset.from_tensor_slices(
                candidate_indices).batch(batch_size)
        else:
            identifiers = None

        # Call twice to ensure the results are repeatable.
        for _ in range(2):
            if use_exclusions:
                layer.index(candidates, identifiers)
                top_scores, top_indices = layer.query_with_exclusions(
                    query, exclude_identifiers)
            else:
                layer.index(candidates, identifiers)
                top_scores, top_indices = layer(query)

        self.assertAllEqual(top_scores.shape, expected_top_scores.shape)
        self.assertAllEqual(top_indices.shape, expected_top_indices.shape)
        self.assertAllClose(top_scores, expected_top_scores)

        self.assertAllEqual(top_indices.numpy().astype(indices_dtype),
                            expected_top_indices)

        if not check_export:
            return

        # Save and restore to check export.
        path = os.path.join(self.get_temp_dir(), "layer")
        layer.save(
            path,
            options=tf.saved_model.SaveOptions(namespace_whitelist=["Scann"]))
        restored = tf.keras.models.load_model(path)

        if use_exclusions:
            _, restored_top_indices = restored.query_with_exclusions(
                query, exclude_identifiers)
        else:
            _, restored_top_indices = restored(query)

        self.assertAllEqual(restored_top_indices.numpy().astype(indices_dtype),
                            expected_top_indices)
예제 #13
0
    np.add.at(ret, hash_poses, 1)
    return ret


minsketch = np.apply_along_axis(count_index, -1, hash_poses_matrix)

print("fs, rs: {} {}".format(np.average(np.array(fs)),
                             np.average(np.array(rs))))
print(np.max(minsketch))
mslist = minsketch.tolist()
#[print(w) for w in minsketch[0]]

normal = [random.uniform(-1000, 1000) for _ in range(100)]
outlier = [random.uniform(-200, 1000) for _ in range(100)]
test_data = np.asarray([normal, outlier])

test_data = [np.take(test_data, dims, axis=1) for dims in chosen_dims]
test_data = [(te - emins[i]) / (emins[i]) + (emaxs[i])
             for i, te in enumerate(test_data)]
test_data = [(np.expand_dims(os[i], 0) + te) / fs[i]
             for i, te in enumerate(test_data)]
test_data_hashed = np.asarray(
    [[np.apply_along_axis(hfunc, 1, te) for hfunc in hashes]
     for te in test_data])

scores = np.take_along_axis(minsketch, test_data_hashed, -1)
scores = scores.min(axis=1)
scores = np.log2(scores + 1)
scores = np.average(scores, axis=0)
print(scores)
예제 #14
0
 def value_inference(self):
     x = self.x.val
     indices = self.indices.val
     axis = self.axis.val
     return np.take_along_axis(x, indices, axis)
예제 #15
0
with tf.Graph().as_default():
    w0 = tf.get_variable('dense_1_vars/weights',
                         shape=(200, h1_dim))  # define variables that we want
    w1 = tf.get_variable('dense_1_vars/weights_1', shape=(h1_dim, h2_dim))
    w2 = tf.get_variable('dense_1_vars/weights_2', shape=(h2_dim, 1))
    b0 = zeros(h1_dim, 'dense_1_vars/bias')
    b1 = zeros(h2_dim, 'dense_1_vars/bias_1')
    b2 = zeros(1, 'dense_1_vars/bias_2')
    saver = tf.train.Saver()
    with tf.Session() as sess:
        saver.restore(sess, 'F:/volume/0217graphsage/0106/model_output/model')
        node_pred = custom_Dense(w0, w1, w2, b0, b1, b2)
        i_prediction = sess.run(tf.nn.sigmoid(node_pred(x))).reshape(size, -1)

    # 研究 prediction
    print(len(np.where(i_prediction > 0.5)[0]))  # 會有315個大於0.5, 157大於0.9
    # find where is the ans index
    ans_where = np.where(np.in1d(candidate_ids, y))  # 答案的 emb index
    # print(i_prediction.reshape(-1)[ans_where])  # 找答案的預估值
    print(len(np.where(
        i_prediction.reshape(-1)[ans_where] > 0.5)[0]))  # NN猜的答案有幾個會大於0.5

    new_sorter = i_prediction.argsort(
        axis=1)[:, -150:]  # sort and select first 150
    new_sorter = np.flip(new_sorter, axis=1)
    batch_classes = np.tile(candidate_ids,
                            (size, 1))  # shape: N * len(candidate_ids)
    classes = np.take_along_axis(batch_classes, new_sorter, axis=1)
    # 算 hit
    print(np.where(np.in1d(classes.reshape(-1), y))[0].shape)
예제 #16
0
 def sort(self, axis=-1, kind=None, order=None):
     """Sort an array in-place. Refer to `numpy.sort` for full documentation."""
     # TODO: probably possible to do this faster than going through argsort!
     indices = self.argsort(axis, kind=kind, order=order)
     self[:] = np.take_along_axis(self, indices, axis=axis)
예제 #17
0
def _generate_lookup_table(recip_latt,
                           reciprocal_radius: float, 
                           unique: bool=True):
    """Generate a look-up table with all combinations of indices,
    including their reciprocal distances and the angle between
    them.   

    Parameters
    ----------
    recip_latt : :class:`diffpy.structure.lattice.Lattice`
        Reciprocal lattice
    reciprocal_radius : float
        The maximum g-vector magnitude to be included in the library.
    unique : bool
        Return a unique list of phase measurements

    Returns
    -------
    indices : np.array
        Nx2x3 numpy array containing the miller indices for 
        reflection1, reflection2
    measurements : np.array
        Nx3 numpy array containing len1, len2, angle

    """
    miller_indices, coordinates, distances = get_points_in_sphere(
        recip_latt,
        reciprocal_radius)

    # Create pair_indices for selecting all point pair combinations
    num_indices = len(miller_indices)
    pair_a_indices, pair_b_indices = np.mgrid[:num_indices, :num_indices]

    # Only select one of the permutations and don't pair an index with
    # itself (select above diagonal)
    upper_indices = np.triu_indices(num_indices, 1)
    pair_a_indices = pair_a_indices[upper_indices].ravel()
    pair_b_indices = pair_b_indices[upper_indices].ravel()

    # Mask off origin (0, 0, 0)
    origin_index = num_indices // 2
    pair_a_indices = pair_a_indices[pair_a_indices != origin_index]
    pair_b_indices = pair_b_indices[pair_b_indices != origin_index]

    pair_indices = np.vstack([pair_a_indices, pair_b_indices])

    # Create library entries
    angles = get_angle_cartesian_vec(coordinates[pair_a_indices], coordinates[pair_b_indices])
    pair_distances = distances[pair_indices.T]
    # Ensure longest vector is first
    len_sort = np.fliplr(pair_distances.argsort(axis=1))
    # phase_index_pairs is a list of [hkl1, hkl2]
    phase_index_pairs = np.take_along_axis(miller_indices[pair_indices.T], len_sort[:, :, np.newaxis], axis=1)
    # phase_measurements is a list of [len1, len2, angle]
    phase_measurements = np.column_stack((np.take_along_axis(pair_distances, len_sort, axis=1), angles))

    if unique:
        # Only keep unique triplets
        measurements, measurement_indices = np.unique(phase_measurements, axis=0, return_index=True)
        indices = phase_index_pairs[measurement_indices]
    else:
        measurements = phase_measurements
        indices = phase_index_pairs

    return measurements, indices
예제 #18
0
    # # if chan_width.value < 0:
    # #     newchan_width *= -1.
    # spec_axis = np.arange(subcube.spectral_axis[0].value,
    #                       subcube.spectral_axis[-1].value,
    #                       newchan_width) * unit
    # assert spec_axis.size > 0
    # subcube = subcube.spectral_interpolate(spec_axis)

    # err_arr = noise_val * np.ones(subcube.shape[1:])

    peaktemp = subcube.max(axis=0)
    vcent = subcube.moment1()

    peakchans = subcube.argmax(axis=0)
    peakvels = np.take_along_axis(
        subcube.spectral_axis[:, np.newaxis, np.newaxis],
        peakchans[np.newaxis, :, :], 0)
    peakvels = peakvels.squeeze()
    peakvels = peakvels.to(u.km / u.s)

    # peak_name = fifteenA_HI_BCtaper_wEBHIS_HI_file_dict['PeakTemp']
    # peaktemp = Projection.from_hdu(fits.open(peak_name))

    # vcent_name = fourteenA_wEBHIS_HI_file_dict['Moment1']
    # vcent = Projection.from_hdu(fits.open(vcent_name)).to(u.km / u.s)

    # Restrict number of positions to fit.
    mask_peak = peaktemp >= 10 * u.K
    # Must have 5 channels above half the peak, following Braun+09
    mask_halfabovepeak = (subcube.filled_data[:] > 5 * u.K).sum(0) > 5
        top10[round_num] = test_metrics['top_10_categorical_accuracy']

        # Generate Accuracy and Throughput Performance Curves
        keras_model = create_keras_model()
        # keras_model.compile(loss=tf.keras.losses.CategoricalCrossentropy(), metrics=[top1,top10])
        state.model.assign_weights_to(keras_model)
        test_preds = keras_model.predict(test_data.lidar_data, batch_size=100)
        test_preds_idx = np.argsort(test_preds, axis=1)
        top_k = np.zeros(100)
        throughput_ratio_at_k = np.zeros(100)
        correct = 0
        for i in range(100):
            correct += np.sum(test_preds_idx[:, -1 - i] == np.argmax(test_data.beam_output, axis=1))
            top_k[i] = correct / test_data.beam_output.shape[0]
            throughput_ratio_at_k[i] = np.sum(np.log2(
                np.max(np.take_along_axis(test_data.beam_output_true, test_preds_idx, axis=1)[:, -1 - i:],
                       axis=1) + 1.0)) / np.sum(np.log2(np.max(test_data.beam_output_true, axis=1) + 1.0))

        sio.savemat('federated_accuracy' + str(round_num) + '.mat', {'accuracy': top_k})
        sio.savemat('federated_throughput' + str(round_num) + '.mat', {'throughput': throughput_ratio_at_k})

    sio.savemat('top1.mat', {'top1': top1})
    sio.savemat('top10.mat', {'top10': top10})

    np.savez("federated.npz", classification=top_k, throughput_ratio=throughput_ratio_at_k)
    accFL = accFL + metrics['train']['top_10_categorical_accuracy'] / MONTECARLO

    print(MONTECARLOi)

print(accFL)
def MIG_4_dSprites_cupy(z_samples, z_mean, z_stddev, num_samples=10000,
                        batch_size=10, version=1, gpu=0):
    """
    :param z_samples: [3, 6, 40, 32, 32, z_dim]
    :param z_mean: [3, 6, 40, 32, 32, z_dim]
    :param z_stddev: [3, 6, 40, 32, 32, z_dim]
    :param batch_size:
    :param version: 1 or 2
    :return:
    """

    assert version == 1 or version == 2, "'version' can only be 1 or 2!"
    if version == 1:
        estimate_entropies = estimate_entropies_v1_cupy
    else:
        estimate_entropies = estimate_entropies_v2

    assert z_samples.shape == z_mean.shape == z_stddev.shape, "z_samples.shape: {}, " \
        "z_mean.shape: {}, z_stddev.shape: {}".format(z_samples.shape, z_mean.shape, z_stddev.shape)
    assert len(z_samples.shape) == 6 and z_samples.shape[:-1] == (3, 6, 40, 32, 32), \
        "z_samples.shape: {}".format(z_samples.shape)

    print("Estimate marginal entropy")
    # H(q(z)) estimated with stratified sampling
    # (z_dim, )
    marginal_entropies = estimate_entropies(
        np.reshape(z_samples, [3 * 6 * 40 * 32 * 32, -1]),
        np.reshape(z_mean, [3 * 6 * 40 * 32 * 32, -1]),
        np.reshape(z_stddev, [3 * 6 * 40 * 32 * 32, -1]),
        num_samples=num_samples, batch_size=batch_size, gpu=gpu)
    # (1, z_dim)
    marginal_entropies = np.expand_dims(marginal_entropies, axis=0)

    # (5, z_dim)
    cond_entropies = np.zeros([5, z_samples.shape[-1]], dtype=np.float32)

    print("Estimate conditional entropy for shape")
    for i in range(3):
        cond_entropies_i = estimate_entropies(
            np.reshape(z_samples[i, :, :, :, :, :], [6 * 40 * 32 * 32, -1]),
            np.reshape(z_mean[i, :, :, :, :, :], [6 * 40 * 32 * 32, -1]),
            np.reshape(z_stddev[i, :, :, :, :, :], [6 * 40 * 32 * 32, -1]),
            num_samples=num_samples, batch_size=batch_size, gpu=gpu)

        # Compute the sum of conditional entropy for each scale value, then take the mean
        cond_entropies[0] += cond_entropies_i / 3.0

    print("Estimate conditional entropy for scale")
    for i in range(6):
        cond_entropies_i = estimate_entropies(
            np.reshape(z_samples[:, i, :, :, :, :], [3 * 40 * 32 * 32, -1]),
            np.reshape(z_mean[:, i, :, :, :, :], [3 * 40 * 32 * 32, -1]),
            np.reshape(z_stddev[:, i, :, :, :, :], [3 * 40 * 32 * 32, -1]),
            num_samples=num_samples, batch_size=batch_size, gpu=gpu)

        # Compute the sum of conditional entropy for each scale value, then take the mean
        cond_entropies[1] += cond_entropies_i / 6.0

    print("Estimate conditional entropy for rotation")
    for i in range(40):
        cond_entropies_i = estimate_entropies(
            np.reshape(z_samples[:, :, i, :, :, :], [3 * 6 * 32 * 32, -1]),
            np.reshape(z_mean[:, :, i, :, :, :], [3 * 6 * 32 * 32, -1]),
            np.reshape(z_stddev[:, :, i, :, :, :], [3 * 6 * 32 * 32, -1]),
            num_samples=num_samples, batch_size=batch_size, gpu=gpu)

        # Compute the sum of conditional entropy for each scale value, then take the mean
        cond_entropies[2] += cond_entropies_i / 40.0

    print("Estimate conditional entropy for pos x")
    for i in range(32):
        cond_entropies_i = estimate_entropies(
            np.reshape(z_samples[:, :, :, i, :, :], [3 * 6 * 40 * 32, -1]),
            np.reshape(z_mean[:, :, :, i, :, :], [3 * 6 * 40 * 32, -1]),
            np.reshape(z_stddev[:, :, :, i, :, :], [3 * 6 * 40 * 32, -1]),
            num_samples=num_samples, batch_size=batch_size, gpu=gpu)

        # Compute the sum of conditional entropy for each scale value, then take the mean
        cond_entropies[3] += cond_entropies_i / 32.0

    print("Estimate conditional entropy for pos y")
    for i in range(32):
        cond_entropies_i = estimate_entropies(
            np.reshape(z_samples[:, :, :, :, i, :], [3 * 6 * 40 * 32, -1]),
            np.reshape(z_mean[:, :, :, :, i, :], [3 * 6 * 40 * 32, -1]),
            np.reshape(z_stddev[:, :, :, :, i, :], [3 * 6 * 40 * 32, -1]),
            num_samples=num_samples, batch_size=batch_size, gpu=gpu)

        # Compute the sum of conditional entropy for each scale value, then take the mean
        cond_entropies[4] += cond_entropies_i / 32.0

    # (5, z_dim)
    MIs = marginal_entropies - cond_entropies
    # (5, z_dim)
    ids_sorted = np.argsort(MIs, axis=1)[:, ::-1]
    MIs_sorted = np.take_along_axis(MIs, ids_sorted, axis=1)

    factor_entropies = np.log([3, 6, 40, 32, 32])

    # Normalize MI by the entropy of factors
    # (5, z_dim)
    MIs_sorted_normed = MIs_sorted / np.expand_dims(factor_entropies, axis=-1)
    # (5,)
    MIG = MIs_sorted_normed[:, 0] - MIs_sorted_normed[:, 1]

    results = {
        'H_z': marginal_entropies,
        'H_y': factor_entropies,
        'H_z_cond_y': cond_entropies,
        'MI': MIs,
        'MI_sorted': MIs_sorted,
        'MI_sorted_normed': MIs_sorted_normed,
        'MIG': MIG,
    }

    return results
예제 #21
0
def find_all_match_idx(query_embeddings: np.ndarray,
                       train_embeddings: np.ndarray,
                       k=0):
    """Find all matches for the test set in the training set at the sametime, using cupy.

    This is solely for optimisation purpose in order to get the code to run faster on machines
    with GPU.

    Args:
        query_embeddings (np.ndarray): Test set embeddings.
        train_embeddings (np.ndarray): Train set embeddings.
        k (int, optional): [description]. Defaults to 0.

    Raises:
        ValueError: The case where "k!=0" is not yet implemeted.

    Returns:
        [type]: [description]
    """
    global use_cupy
    print("Using GPU to compute matches!")

    if k != 0:
        #best_matches = cp.argsort(-cp.dot(query_embeddings.T,train_embeddings))
        #match_idx = best_matches[k]
        raise ValueError("The case where k is not 0 must be implemented.")
    else:
        match_idxs = []
        query_chunk_size = 1024
        train_chunk_size = 65536 * 2
        for i in tqdm(
                range(0, math.ceil(len(query_embeddings) / query_chunk_size))):
            query_start = i * query_chunk_size
            query_end = query_start + query_chunk_size
            if query_end > len(query_embeddings):
                query_end = len(query_embeddings)
            cuda_query_embeddings = cp.asarray(
                query_embeddings[query_start:query_end])

            matches = []
            scores = []
            best_match_idx_chunk_score = np.zeros((query_end - query_start, 1))
            best_match_idx_chunk = np.zeros((query_end - query_start, 1),
                                            dtype=np.uint64)
            for j in range(
                    0,
                    math.ceil(train_embeddings.shape[1] / train_chunk_size)):
                train_start = j * train_chunk_size
                train_end = train_start + train_chunk_size
                if train_end > train_embeddings.shape[1]:
                    train_end = train_embeddings.shape[1]
                cuda_train_embeddings = cp.asarray(
                    train_embeddings[:, train_start:train_end])
                similarity = cp.dot(cuda_query_embeddings,
                                    cuda_train_embeddings)
                match_idx_chunk = cp.argmax(similarity, axis=1).get()
                similarity = similarity.get()
                match_idx_chunk_score = np.take_along_axis(similarity,
                                                           np.expand_dims(
                                                               match_idx_chunk,
                                                               axis=1),
                                                           axis=1)
                match_idx_chunk += train_start
                best_match_idx_chunk = np.where(
                    match_idx_chunk_score > best_match_idx_chunk_score,
                    np.expand_dims(match_idx_chunk, axis=1),
                    best_match_idx_chunk).astype(np.uint64)
                best_match_idx_chunk_score = np.where(
                    match_idx_chunk_score > best_match_idx_chunk_score,
                    match_idx_chunk_score, best_match_idx_chunk_score)

                #if use_cupy:
                #match_idx_chunk=match_idx_chunk.get()

                matches.append(match_idx_chunk)
            match_idxs += best_match_idx_chunk.squeeze().tolist()
        return match_idxs
예제 #22
0
    def transform(self,
                  neigh_dist,
                  neigh_ind,
                  X=None,
                  assume_sorted: bool = True,
                  *args,
                  **kwargs) -> (np.ndarray, np.ndarray):
        """ Transform distance between test and training data with Mutual Proximity.

        Parameters
        ----------
        neigh_dist: np.ndarray, shape (n_query, n_neighbors)
            Distance matrix of test objects (rows) against their individual
            k nearest neighbors among the training data (columns).

        neigh_ind: np.ndarray, shape (n_query, n_neighbors)
            Neighbor indices corresponding to the values in neigh_dist

        X: ignored

        assume_sorted: bool, default = True
            Assume input matrices are sorted according to neigh_dist.
            If False, these are partitioned here.

            NOTE: The returned matrices are never sorted.

        Returns
        -------
        hub_reduced_dist, neigh_ind
            Local scaling distances, and corresponding neighbor indices

        Notes
        -----
        The returned distances are NOT sorted! If you use this class directly,
        you will need to sort the returned matrices according to hub_reduced_dist.
        Classes from :mod:`skhubness.neighbors` do this automatically.
        """
        check_is_fitted(self, 'r_dist_train_')

        n_test, n_indexed = neigh_dist.shape

        if n_indexed == 1:
            warnings.warn(
                f'Cannot perform hubness reduction with a single neighbor per query. '
                f'Skipping hubness reduction, and returning untransformed distances.'
            )
            return neigh_dist, neigh_ind

        # increment to include the k-th element in slicing
        k = self.k + 1

        # Find distances to the k-th neighbor (standard LS) or the k neighbors (NICDM)
        if assume_sorted:
            r_dist_test = neigh_dist[:, :k]
        else:
            kth = np.arange(self.k)
            mask = np.argpartition(neigh_dist, kth=kth)[:, :k]
            r_dist_test = np.take_along_axis(neigh_dist, mask, axis=1)

        # Calculate LS or NICDM
        hub_reduced_dist = np.empty_like(neigh_dist)

        # Optionally show progress of local scaling loop
        if self.verbose:
            range_n_test = tqdm(range(n_test),
                                total=n_test,
                                desc=f'LS {self.method}')
        else:
            range_n_test = range(n_test)

        # Perform standard local scaling...
        if self.method in ['ls', 'standard']:
            r_train = self.r_dist_train_[:, -1]
            r_test = r_dist_test[:, -1]
            for i in range_n_test:
                hub_reduced_dist[i, :] = \
                    1. - np.exp(-1 * neigh_dist[i] ** 2 / (r_test[i] * r_train[neigh_ind[i]]))
        # ...or use non-iterative contextual dissimilarity measure
        elif self.method == 'nicdm':
            r_train = self.r_dist_train_.mean(axis=1)
            r_test = r_dist_test.mean(axis=1)
            for i in range_n_test:
                hub_reduced_dist[i, :] = neigh_dist[i] / np.sqrt(
                    (r_test[i] * r_train[neigh_ind[i]]))
        else:
            raise ValueError(
                f"Internal: Invalid method {self.method}. Try 'ls' or 'nicdm'."
            )

        # Return the hubness reduced distances
        # These must be sorted downstream
        return hub_reduced_dist, neigh_ind
def main(args):
    # =====================================
    # Load config
    # =====================================
    with open(join(args.output_dir, 'config.json')) as f:
        config = json.load(f)
    args.__dict__.update(config)

    # =====================================
    # Dataset
    # =====================================
    data_file = join(RAW_DATA_DIR, "ComputerVision", "dSprites",
                     "dsprites_ndarray_co1sh3sc6or40x32y32_64x64.npz")

    # It is already in the range [0, 1]
    with np.load(data_file, encoding="latin1") as f:
        x_train = f['imgs']
        # 3 shape * 6 scale * 40 rotation * 32 pos X * 32 pos Y
        y_train = f['latents_classes']

    x_train = np.expand_dims(x_train.astype(np.float32), axis=-1)
    num_train = len(x_train)
    print("num_train: {}".format(num_train))

    print("y_train[:10]: {}".format(y_train[:10]))

    # =====================================
    # Instantiate model
    # =====================================
    if args.enc_dec_model == "1Konny":
        encoder = Encoder_1Konny(args.z_dim, stochastic=True)
        decoder = Decoder_1Konny()
        disc_z = DiscriminatorZ_1Konny(num_outputs=2)
    else:
        raise ValueError("Do not support enc_dec_model='{}'!".format(args.enc_dec_model))

    model = FactorVAE([64, 64, 1], args.z_dim,
                      encoder=encoder, decoder=decoder,
                      discriminator_z=disc_z,
                      rec_x_mode=args.rec_x_mode,
                      use_gp0_z_tc=True, gp0_z_tc_mode=args.gp0_z_tc_mode)

    loss_coeff_dict = {
        'rec_x': args.rec_x_coeff,
        'kld_loss': args.kld_loss_coeff,
        'tc_loss': args.tc_loss_coeff,
        'gp0_z_tc': args.gp0_z_tc_coeff,
        'Dz_tc_loss_coeff': args.Dz_tc_loss_coeff,
    }

    model.build(loss_coeff_dict)
    SimpleParamPrinter.print_all_params_tf_slim()

    # =====================================
    # Load model
    # =====================================
    config_proto = tf.ConfigProto(allow_soft_placement=True)
    config_proto.gpu_options.allow_growth = True
    config_proto.gpu_options.per_process_gpu_memory_fraction = 0.9
    sess = tf.Session(config=config_proto)

    model_dir = make_dir_if_not_exist(join(args.output_dir, "model_tf"))
    train_helper = SimpleTrainHelper(log_dir=None, save_dir=model_dir)

    # Load model
    train_helper.load(sess, load_step=args.load_step)

    # =====================================
    # Experiments
    save_dir = make_dir_if_not_exist(join(args.save_dir, "{}_{}".format(args.enc_dec_model, args.run)))
    # =====================================

    np.set_printoptions(threshold=np.nan, linewidth=1000, precision=5, suppress=True)

    num_bins = args.num_bins
    bin_limits = tuple([float(s) for s in args.bin_limits.split(";")])
    data_proportion = args.data_proportion
    num_data = int(data_proportion * num_train)
    assert num_data == num_train, "For dSprites, you must use all data!"
    eps = 1e-8

    # file
    f = open(join(save_dir, 'log[bins={},bin_limits={},data={}].txt'.
                  format(num_bins, bin_limits, data_proportion)), mode='w')

    # print function
    print_ = functools.partial(print_both, file=f)

    print_("num_bins: {}".format(num_bins))
    print_("bin_limits: {}".format(bin_limits))
    print_("data_proportion: {}".format(data_proportion))

    # Compute bins
    # ================================= #
    print_("")
    print_("bin_limits: {}".format(bin_limits))
    assert len(bin_limits) == 2 and bin_limits[0] < bin_limits[1], "bin_limits={}".format(bin_limits)

    bins = np.linspace(bin_limits[0], bin_limits[1], num_bins + 1, endpoint=True)
    print_("bins: {}".format(bins))
    assert len(bins) == num_bins + 1

    bin_widths = [bins[b] - bins[b - 1] for b in range(1, len(bins))]
    print_("bin_widths: {}".format(bin_widths))
    assert len(bin_widths) == num_bins, "len(bin_widths)={} while num_bins={}!".format(len(bin_widths), num_bins)
    assert np.all(np.greater(bin_widths, 0)), "bin_widths: {}".format(bin_widths)

    bin_centers = [(bins[b] + bins[b - 1]) * 0.5 for b in range(1, len(bins))]
    print_("bin_centers: {}".format(bin_centers))
    assert len(bin_centers) == num_bins, "len(bin_centers)={} while num_bins={}!".format(len(bin_centers), num_bins)
    # ================================= #

    # Compute representations
    # ================================= #
    z_data_file = join(save_dir, "z_data[data={}].npz".format(data_proportion))

    if not exists(z_data_file):
        all_z_mean = []
        all_z_stddev = []

        print("")
        print("Compute all_z_mean, all_z_stddev and all_attrs!")

        count = 0
        for batch_ids in iterate_data(num_data, 10 * args.batch_size, shuffle=False):
            x = x_train[batch_ids]

            z_mean, z_stddev = sess.run(
                model.get_output(['z_mean', 'z_stddev']),
                feed_dict={model.is_train: False, model.x_ph: x})

            all_z_mean.append(z_mean)
            all_z_stddev.append(z_stddev)

            count += len(batch_ids)
            print("\rProcessed {} samples!".format(count), end="")
        print()

        all_z_mean = np.concatenate(all_z_mean, axis=0)
        all_z_stddev = np.concatenate(all_z_stddev, axis=0)

        np.savez_compressed(z_data_file, all_z_mean=all_z_mean,
                            all_z_stddev=all_z_stddev)
    else:
        print("{} exists. Load data from file!".format(z_data_file))
        with np.load(z_data_file, "r") as f:
            all_z_mean = f['all_z_mean']
            all_z_stddev = f['all_z_stddev']
    # ================================= #

    print_("")
    all_Q_z_cond_x = []
    for i in range(args.z_dim):
        print_("\nCompute all_Q_z{}_cond_x!".format(i))

        all_Q_s_cond_x = []
        for batch_ids in iterate_data(len(all_z_mean), 500, shuffle=False, include_remaining=True):
            # (batch_size, num_bins)
            q_s_cond_x = normal_density(np.expand_dims(bin_centers, axis=0),
                                        mean=np.expand_dims(all_z_mean[batch_ids, i], axis=-1),
                                        stddev=np.expand_dims(all_z_stddev[batch_ids, i], axis=-1))

            # (batch_size, num_bins)
            max_q_s_cond_x = np.max(q_s_cond_x, axis=-1)
            # print("\nmax_q_s_cond_x: {}".format(np.sort(max_q_s_cond_x)))

            # (batch_size, num_bins)
            deter_s_cond_x = at_bin(all_z_mean[batch_ids, i], bins).astype(np.float32)

            # (batch_size, num_bins)
            Q_s_cond_x = q_s_cond_x * np.expand_dims(bin_widths, axis=0)
            Q_s_cond_x = Q_s_cond_x / np.maximum(np.sum(Q_s_cond_x, axis=1, keepdims=True), eps)
            # print("sort(sum(Q_s_cond_x)) (before): {}".format(np.sort(np.sum(Q_s_cond_x, axis=-1))))

            Q_s_cond_x = np.where(np.expand_dims(np.less(max_q_s_cond_x, 1e-5), axis=-1),
                                  deter_s_cond_x, Q_s_cond_x)
            # print("sort(sum(Q_s_cond_x)) (after): {}".format(np.sort(np.sum(Q_s_cond_x, axis=-1))))

            all_Q_s_cond_x.append(Q_s_cond_x)

        # (num_samples, num_bins)
        all_Q_s_cond_x = np.concatenate(all_Q_s_cond_x, axis=0)
        assert np.all(all_Q_s_cond_x >= 0), "'all_Q_s_cond_x' contains negative values. " \
            "sorted_all_Q_s_cond_x[:30]:\n{}!".format(np.sort(all_Q_s_cond_x[:30], axis=None))
        assert len(all_Q_s_cond_x) == num_train

        all_Q_z_cond_x.append(all_Q_s_cond_x)

    # (z_dim, num_samples, num_bins)
    all_Q_z_cond_x = np.asarray(all_Q_z_cond_x, dtype=np.float32)
    print_("all_Q_z_cond_x.shape: {}".format(all_Q_z_cond_x.shape))
    print_("sum(all_Q_z_cond_x)[:, :10]:\n{}".format(np.sum(all_Q_z_cond_x, axis=-1)[:, :10]))

    # (z_dim, num_bins)
    Q_z = np.mean(all_Q_z_cond_x, axis=1)
    log_Q_z = np.log(np.clip(Q_z, eps, 1-eps))
    print_("Q_z.shape: {}".format(Q_z.shape))
    print_("sum(Q_z): {}".format(np.sum(Q_z, axis=-1)))

    # (z_dim, )
    H_z = -np.sum(Q_z * log_Q_z, axis=-1)

    # Factors
    gt_factors = ['shape', 'scale', 'rotation', 'pos_x', 'pos_y']
    gt_num_values = [3, 6, 40, 32, 32]

    MI_z_y = np.zeros([args.z_dim, len(gt_factors)], dtype=np.float32)
    H_z_y = np.zeros([args.z_dim, len(gt_factors)], dtype=np.float32)

    ids_sorted = np.zeros([args.z_dim, len(gt_factors)], dtype=np.int32)
    MI_z_y_sorted = np.zeros([args.z_dim, len(gt_factors)], dtype=np.float32)
    H_z_y_sorted = np.zeros([args.z_dim, len(gt_factors)], dtype=np.float32)

    H_y = []
    RMIG = []
    JEMMI = []

    for k, (factor, num_values) in enumerate(zip(gt_factors, gt_num_values)):
        print_("\n#" + "=" * 50 + "#")
        print_("The {}-th gt factor '{}' has {} values!".format(k, factor, num_values))

        print_("")
        # (num_samples, num_categories)
        # NOTE: We must use k+1 to account for the 'color' attribute, which is always white
        all_Q_yk_cond_x = one_hot(y_train[:, k+1], num_categories=num_values, dtype=np.float32)
        print_("all_Q_yk_cond_x.shape: {}".format(all_Q_yk_cond_x.shape))

        # (num_categories)
        Q_yk = np.mean(all_Q_yk_cond_x, axis=0)
        log_Q_yk = np.log(np.clip(Q_yk, eps, 1-eps))
        print_("Q_yk.shape: {}".format(Q_yk.shape))

        H_yk = -np.sum(Q_yk * log_Q_yk)
        print_("H_yk: {}".format(H_yk))
        H_y.append(H_yk)

        Q_z_yk = np.zeros([args.z_dim, num_bins, num_values], dtype=np.float32)

        # Compute I(zi, yk)
        for i in range(args.z_dim):
            print_("\n#" + "-" * 50 + "#")
            all_Q_zi_cond_x = all_Q_z_cond_x[i]
            assert len(all_Q_zi_cond_x) == len(all_Q_yk_cond_x) == num_train, \
                "all_Q_zi_cond_x.shape: {}, all_Q_yk_cond_x.shape: {}".format(
                    all_Q_zi_cond_x.shape, all_Q_yk_cond_x.shape)

            # (num_bins, num_categories)
            Q_zi_yk = np.matmul(np.transpose(all_Q_zi_cond_x, axes=[1, 0]), all_Q_yk_cond_x)
            Q_zi_yk = Q_zi_yk / num_train
            print_("np.sum(Q_zi_yk): {}".format(np.sum(Q_zi_yk)))
            Q_zi_yk = Q_zi_yk / np.maximum(np.sum(Q_zi_yk), eps)
            print_("np.sum(Q_zi_yk) (normalized): {}".format(np.sum(Q_zi_yk)))

            assert np.all(Q_zi_yk >= 0), "'Q_zi_yk' contains negative values. " \
                "sorted_Q_zi_yk[:10]:\n{}!".format(np.sort(Q_zi_yk, axis=None))

            # (num_bins, num_categories)
            log_Q_zi_yk = np.log(np.clip(Q_zi_yk, eps, 1 - eps))

            print_("")
            print_("Q_zi (default): {}".format(Q_z[i]))
            print_("Q_zi (sum of Q_zi_yk over yk): {}".format(np.sum(Q_zi_yk, axis=-1)))

            print_("")
            print_("Q_yk (default): {}".format(Q_yk))
            print_("Q_yk (sum of Q_zi_yk over zi): {}".format(np.sum(Q_zi_yk, axis=0)))

            MI_zi_yk = Q_zi_yk * (log_Q_zi_yk -
                                  np.expand_dims(log_Q_z[i], axis=-1) -
                                  np.expand_dims(log_Q_yk, axis=0))

            MI_zi_yk = np.sum(MI_zi_yk)
            H_zi_yk = -np.sum(Q_zi_yk * log_Q_zi_yk)

            Q_z_yk[i] = Q_zi_yk
            MI_z_y[i, k] = MI_zi_yk
            H_z_y[i, k] = H_zi_yk

            print_("#" + "-" * 50 + "#")

        # Print statistics for all z
        print_("")
        print_("MI_z_yk:\n{}".format(MI_z_y[:, k]))
        print_("H_z_yk:\n{}".format(H_z_y[:, k]))
        print_("H_z:\n{}".format(H_z))
        print_("H_yk:\n{}".format(H_yk))

        # Compute RMIG and JEMMI
        ids_yk_sorted = np.argsort(MI_z_y[:, k], axis=0)[::-1]
        MI_z_yk_sorted = np.take_along_axis(MI_z_y[:, k], ids_yk_sorted, axis=0)
        H_z_yk_sorted = np.take_along_axis(H_z_y[:, k], ids_yk_sorted, axis=0)

        RMIG_yk = np.divide(MI_z_yk_sorted[0] - MI_z_yk_sorted[1], H_yk)
        JEMMI_yk = np.divide(H_z_yk_sorted[0] - MI_z_yk_sorted[0] + MI_z_yk_sorted[1],
                             H_yk + np.log(num_bins))

        ids_sorted[:, k] = ids_yk_sorted
        MI_z_y_sorted[:, k] = MI_z_yk_sorted
        H_z_y_sorted[:, k] = H_z_yk_sorted

        RMIG.append(RMIG_yk)
        JEMMI.append(JEMMI_yk)

        print_("")
        print_("ids_sorted: {}".format(ids_sorted))
        print_("MI_z_yk_sorted: {}".format(MI_z_yk_sorted))
        print_("RMIG_yk: {}".format(RMIG_yk))
        print_("JEMMI_yk: {}".format(JEMMI_yk))

        z_yk_prob_file = join(save_dir, "z_yk_prob_4_{}[bins={},bin_limits={},data={}].npz".
                              format(factor, num_bins, bin_limits, data_proportion))
        np.savez_compressed(z_yk_prob_file, Q_z_yk=Q_z_yk)
        print_("#" + "=" * 50 + "#")

    results = {
        "MI_z_y": MI_z_y,
        "H_z_y": H_z_y,
        "ids_sorted": ids_sorted,
        "MI_z_y_sorted": MI_z_y_sorted,
        "H_z_y_sorted": H_z_y_sorted,
        "H_z": H_z,
        "H_y": np.asarray(H_y, dtype=np.float32),
        "RMIG": np.asarray(RMIG, dtype=np.float32),
        "JEMMI": np.asarray(JEMMI, dtype=np.float32),
    }
    result_file = join(save_dir, "results[bins={},bin_limits={},data={}].npz".
                       format(num_bins, bin_limits, data_proportion))
    np.savez_compressed(result_file, **results)

    f.close()
예제 #24
0
def evaluate_similarity_metrics(separation: nn.Module, completion: nn.Module,
                                triplet: nn.Module, device, dataset_path: str,
                                scannet_path: str, shapenet_path: str) -> None:
    unique_scan_objects, unique_cad_objects = get_unique_samples(dataset_path)

    batch_size = 1
    scan_dataset: Dataset = data.FileListDataset(
        scannet_path,
        unique_scan_objects,
        ".sdf",
        transformation=data.to_occupancy_grid)
    scan_dataloader = torch.utils.data.DataLoader(dataset=scan_dataset,
                                                  shuffle=False,
                                                  batch_size=batch_size)

    # Evaluate all unique scan embeddings
    embeddings: Dict[str, np.array] = {}
    for name, element in tqdm(scan_dataloader, total=len(scan_dataloader)):
        # Move data to GPU
        element = element.to(device)
        with torch.no_grad():
            scan_foreground, _ = separation(torch.sigmoid(element))
            scan_completed = completion(torch.sigmoid(scan_foreground))
            scan_latent = triplet.embed(torch.sigmoid(scan_completed)).view(-1)

        embeddings[name[0]] = scan_latent.cpu().numpy().squeeze()

    # Evaluate all unique cad embeddings
    cad_dataset: Dataset = data.FileListDataset(
        shapenet_path,
        unique_cad_objects,
        "__0__.df",
        transformation=data.to_occupancy_grid)
    cad_dataloader = torch.utils.data.DataLoader(dataset=cad_dataset,
                                                 shuffle=False,
                                                 batch_size=batch_size)

    for name, element in tqdm(cad_dataloader, total=len(cad_dataloader)):
        # Move data to GPU
        element = element.to(device)
        with torch.no_grad():
            cad_latent = triplet.embed(element).view(-1)

        embeddings[name[0]] = cad_latent.cpu().numpy().squeeze()

    # embedding_vectors = np.load("/mnt/raid/dahnert/joint_embedding_binary/embedding_vectors.npy")
    # embedding_names = json.load(open("/mnt/raid/dahnert/joint_embedding_binary/embedding_names.json"))
    # embeddings = dict(zip(embedding_names, embedding_vectors))

    # Evaluate metrics
    with open(dataset_path) as f:
        samples = json.load(f).get("samples")

        retrieved_correct = 0
        retrieved_total = 0

        ranked_correct = 0
        ranked_total = 0

        selected_categories = [
            "02747177", "02808440", "02818832", "02871439", "02933112",
            "03001627", "03211117", "03337140", "04256520", "04379243", "other"
        ]
        per_category_retrieved_correct = {
            category: 0
            for category in selected_categories
        }
        per_category_retrieved_total = {
            category: 0
            for category in selected_categories
        }

        per_category_ranked_correct = {
            category: 0
            for category in selected_categories
        }
        per_category_ranked_total = {
            category: 0
            for category in selected_categories
        }

        # Iterate over all annotations
        for sample in tqdm(samples, total=len(samples)):
            reference_name = sample["reference"]["name"].replace("/scan/", "")
            reference_embedding = embeddings[reference_name][np.newaxis, :]

            pool_names = np.asarray(
                [p["name"].replace("/cad/", "") for p in sample["pool"]])
            pool_embeddings = [embeddings[p] for p in pool_names]
            pool_embeddings = np.asarray(pool_embeddings)

            # Compute distances in embedding space
            distances = scipy.spatial.distance.cdist(reference_embedding,
                                                     pool_embeddings,
                                                     metric="euclidean")
            sorted_indices = np.argsort(distances, axis=1)
            sorted_distances = np.take_along_axis(distances,
                                                  sorted_indices,
                                                  axis=1)
            sorted_distances = sorted_distances[0]

            predicted_ranking = np.take(pool_names, sorted_indices)[0].tolist()

            ground_truth_names = [
                r["name"].replace("/cad/", "") for r in sample["ranked"]
            ]

            # retrieval accuracy
            sample_retrieved_correct = 1 if metrics.is_correctly_retrieved(
                predicted_ranking, ground_truth_names) else 0
            retrieved_correct += sample_retrieved_correct
            retrieved_total += 1

            # per-category retrieval accuracy
            reference_category = metrics.get_category_from_list(
                metrics.get_category(reference_name), selected_categories)
            per_category_retrieved_correct[
                reference_category] += sample_retrieved_correct
            per_category_retrieved_total[reference_category] += 1

            # ranking quality
            sample_ranked_correct = metrics.count_correctly_ranked_predictions(
                predicted_ranking, ground_truth_names)
            ranked_correct += sample_ranked_correct
            ranked_total += len(ground_truth_names)

            per_category_ranked_correct[
                reference_category] += sample_ranked_correct
            per_category_ranked_total[reference_category] += len(
                ground_truth_names)

        print(
            f"correct: {retrieved_correct}, total: {retrieved_total}, accuracy: {retrieved_correct/retrieved_total}"
        )

        for (category,
             correct), total in zip(per_category_retrieved_correct.items(),
                                    per_category_retrieved_total.values()):
            print(
                f"{category}: {correct:>5d}/{total:>5d} --> {correct/total:4.3f}"
            )

        print(
            f"correct: {ranked_correct}, total: {ranked_total}, accuracy: {ranked_correct/ranked_total}"
        )

        for (category,
             correct), total in zip(per_category_ranked_correct.items(),
                                    per_category_ranked_total.values()):
            print(
                f"{category}: {correct:>5d}/{total:>5d} --> {correct/total:4.3f}"
            )

    return None
예제 #25
0
def process(policy, dataloader, top_k):
    mean_kacc = np.zeros(len(top_k))

    n_samples_processed = 0
    for batch in dataloader:

        if policy['type'] == 'gcnn':
            c, ei, ev, v, n_cs, n_vs, n_cands, cands, best_cands, cand_scores = batch

            pred_scores = policy['model'](
                (c, ei, ev, v, tf.reduce_sum(
                    n_cs, keepdims=True), tf.reduce_sum(n_vs, keepdims=True)),
                tf.convert_to_tensor(False))

            # filter candidate variables
            pred_scores = tf.expand_dims(
                tf.gather(tf.squeeze(pred_scores, 0), cands), 0)

        elif policy['type'] == 'ml-competitor':
            cand_feats, n_cands, best_cands, cand_scores = batch

            # move to numpy
            cand_feats = cand_feats.numpy()
            n_cands = n_cands.numpy()

            # feature normalization
            cand_feats = (cand_feats -
                          policy['feat_shift']) / policy['feat_scale']

            pred_scores = policy['model'].predict(cand_feats)

            # move back to TF
            pred_scores = tf.convert_to_tensor(pred_scores.reshape((1, -1)),
                                               dtype=tf.float32)

        # padding
        pred_scores = padding(pred_scores, n_cands)
        true_scores = padding(tf.reshape(cand_scores, (1, -1)), n_cands)
        true_bestscore = tf.reduce_max(true_scores, axis=-1, keepdims=True)

        assert all(true_bestscore.numpy() == np.take_along_axis(
            true_scores.numpy(), best_cands.numpy().reshape((-1, 1)), axis=1))

        kacc = []
        for k in top_k:
            pred_top_k = tf.nn.top_k(pred_scores, k=k)[1].numpy()
            pred_top_k_true_scores = np.take_along_axis(true_scores.numpy(),
                                                        pred_top_k,
                                                        axis=1)
            kacc.append(
                np.mean(
                    np.any(pred_top_k_true_scores == true_bestscore.numpy(),
                           axis=1)))
        kacc = np.asarray(kacc)

        batch_size = int(n_cands.shape[0])
        mean_kacc += kacc * batch_size
        n_samples_processed += batch_size

    mean_kacc /= n_samples_processed

    return mean_kacc
예제 #26
0
def shuffle_along_axis(
    a, axis
):  # Function courtesy of Divakar (https://stackoverflow.com/questions/5040797/shuffling-numpy-array-along-a-given-axis/5044364#5044364)
    idx = np.random.rand(*a.shape).argsort(axis=axis)
    return np.take_along_axis(a, idx, axis=axis)
예제 #27
0
    def simplify_ad(self):
        if len(self.shape) == 0:  # Workaround for scalar-like arrays
            other = self.reshape((1, ))
            other.simplify_ad()
            other = other.reshape(tuple())
            self.coef, self.index = other.coef, other.index
            return
        bad_index = np.iinfo(self.index.dtype).max
        bad_pos = self.coef == 0
        self.index[bad_pos] = bad_index
        ordering = self.index.argsort(axis=-1)
        self.coef = np.take_along_axis(self.coef, ordering, axis=-1)
        self.index = np.take_along_axis(self.index, ordering, axis=-1)

        cum_coef = np.full(self.shape, 0.)
        indices = np.full(self.shape, 0)
        size_ad = self.size_ad
        self.coef = np.moveaxis(self.coef, -1, 0)
        self.index = np.moveaxis(self.index, -1, 0)
        prev_index = np.copy(self.index[0])

        for i in range(size_ad):
            # Note : self.index, self.coef change during iterations
            ind, co = self.index[i], self.coef[i]
            pos_new_index = np.logical_and(prev_index != ind, ind != bad_index)
            pos_old_index = np.logical_not(pos_new_index)
            prev_index[pos_new_index] = ind[pos_new_index]
            cum_coef[pos_new_index] = co[pos_new_index]
            cum_coef[pos_old_index] += co[pos_old_index]
            indices[pos_new_index] += 1
            indices_exp = np.expand_dims(indices, axis=0)
            np.put_along_axis(self.index, indices_exp, prev_index, axis=0)
            np.put_along_axis(self.coef, indices_exp, cum_coef, axis=0)

        indices[self.index[0] == bad_index] = -1
        indices_max = np.max(indices, axis=None)
        size_ad_new = indices_max + 1
        self.coef = self.coef[:size_ad_new]
        self.index = self.index[:size_ad_new]
        if size_ad_new == 0:
            self.coef = np.moveaxis(self.coef, 0, -1)
            self.index = np.moveaxis(self.index, 0, -1)
            return

        coef_end = self.coef[np.maximum(indices_max, 0)]
        index_end = self.index[np.maximum(indices_max, 0)]
        coef_end[indices < indices_max] = 0.
        index_end[indices < indices_max] = -1
        while np.min(indices, axis=None) < indices_max:
            indices = np.minimum(indices_max, 1 + indices)
            indices_exp = np.expand_dims(indices, axis=0)
            np.put_along_axis(self.coef, indices_exp, coef_end, axis=0)
            np.put_along_axis(self.index, indices_exp, index_end, axis=0)

        self.coef = np.moveaxis(self.coef, 0, -1)
        self.index = np.moveaxis(self.index, 0, -1)
        self.coef = self.coef.reshape(self.shape + (size_ad_new, ))
        self.index = self.index.reshape(self.shape + (size_ad_new, ))

        self.index[self.index ==
                   -1] = 0  # Corresponding coefficient is zero anyway.
예제 #28
0
def shuffle_along_axis(a, axis):
    idx = np.random.rand(*a.shape).argsort(axis=axis)
    return np.take_along_axis(a, idx, axis=axis)
예제 #29
0
def assign(dts: np.ndarray, gts: np.ndarray, cfg: DetectionCfg) -> np.ndarray:
    """Attempt assignment of each detection to a ground truth label.

    Args:
        dts: Detections of shape (N,).
        gts: Ground truth labels of shape (M,).
        cfg: Detection configuration.

    Returns:
        metrics: Matrix of true/false positive concatenated with true positive errors (N, K + S) where K is the number
            of true positive thresholds used for AP computation and S is the number of true positive errors.
    """

    # Ensure the number of boxes considered per class is at most `MAX_NUM_BOXES`.
    if dts.shape[0] > MAX_NUM_BOXES:
        dts = dts[:MAX_NUM_BOXES]

    n_threshs = len(cfg.affinity_threshs)
    metrics = np.zeros((dts.shape[0], n_threshs + N_TP_ERRORS))

    # Set the true positive metrics to np.nan since error is undefined on false positives.
    metrics[:, n_threshs : n_threshs + N_TP_ERRORS] = np.nan
    if gts.shape[0] == 0:
        return metrics

    affinity_matrix = compute_affinity_matrix(dts, gts, cfg.affinity_fn_type)

    # Get the GT label for each max-affinity GT label, detection pair.
    gt_matches = affinity_matrix.argmax(axis=1)[np.newaxis, :]

    # The affinity matrix is an N by M matrix of the detections and ground truth labels respectively.
    # We want to take the corresponding affinity for each of the initial assignments using `gt_matches`.
    # The following line grabs the max affinity for each detection to a ground truth label.
    affinities = np.take_along_axis(affinity_matrix.T, gt_matches, axis=0).squeeze(0)

    # Find the indices of the "first" detection assigned to each GT.
    unique_gt_matches, unique_dt_matches = np.unique(gt_matches, return_index=True)
    for i, thresh in enumerate(cfg.affinity_threshs):

        # `tp_mask` may need to be defined differently with other affinities.
        tp_mask = affinities[unique_dt_matches] > -thresh
        metrics[unique_dt_matches, i] = tp_mask

        # Only compute true positive error when `thresh` is equal to the tp threshold.
        is_tp_thresh = thresh == cfg.tp_thresh
        # Ensure that there are true positives of the respective class in the frame.
        has_true_positives = np.count_nonzero(tp_mask) > 0

        if is_tp_thresh and has_true_positives:
            dt_tp_indices = unique_dt_matches[tp_mask]
            gt_tp_indices = unique_gt_matches[tp_mask]

            # Form DataFrame of shape (N, D) where D is the number of attributes in `ObjectLabelRecord`.
            dt_df = pd.DataFrame([dt.__dict__ for dt in dts[dt_tp_indices]])
            gt_df = pd.DataFrame([gt.__dict__ for gt in gts[gt_tp_indices]])

            trans_error = dist_fn(dt_df, gt_df, DistFnType.TRANSLATION)
            scale_error = dist_fn(dt_df, gt_df, DistFnType.SCALE)
            orient_error = dist_fn(dt_df, gt_df, DistFnType.ORIENTATION)

            metrics[dt_tp_indices, n_threshs : n_threshs + N_TP_ERRORS] = np.vstack(
                (trans_error, scale_error, orient_error)
            ).T
    return metrics
예제 #30
0
def v_iter_couple(setup,
                  t,
                  EV_tuple,
                  ushift,
                  nbatch=nbatch_def,
                  verbose=False,
                  force_f32=False):

    if verbose: start = default_timer()

    agrid = setup.agrid_c
    sgrid = setup.sgrid_c

    dtype = setup.dtype

    ls = setup.ls_levels
    nls = len(ls)

    # type conversion is here

    zf = setup.exogrid.all_t[t][:, 0]
    zm = setup.exogrid.all_t[t][:, 1]
    zftrend = setup.pars['f_wage_trend'][t]
    zmtrend = setup.pars['m_wage_trend'][t]

    psi = setup.exogrid.all_t[t][:, 2]
    beta = setup.pars['beta_t'][t]
    sigma = setup.pars['crra_power']
    R = setup.pars['R_t'][t]

    nexo = setup.pars['nexo_t'][t]
    shp = (setup.na, nexo, setup.ntheta)

    wf = np.exp(zf + zftrend)
    wm = np.exp(zm + zmtrend)

    dtype_here = np.float32 if force_f32 else dtype

    if EV_tuple is None:
        EVr_by_l, EVc_by_l, EV_fem_by_l, EV_mal_by_l = np.zeros(
            ((4, ) + shp + (nls, )), dtype=dtype)
    else:
        EVr_by_l, EVc_by_l, EV_fem_by_l, EV_mal_by_l = EV_tuple

    # type conversion
    sgrid, sigma, beta = (dtype(x) for x in (sgrid, sigma, beta))

    V_couple, c_opt, s_opt, x_opt = np.empty((4, ) + shp, dtype)
    i_opt, il_opt = np.empty(shp, np.int16), np.empty(shp, np.int16)

    V_all_l = np.empty(shp + (nls, ), dtype=dtype)

    theta_val = dtype(setup.thetagrid)

    # the original problem is max{umult*u(c) + beta*EV}
    # we need to rescale the problem to max{u(c) + beta*EV_resc}

    istart = 0
    ifinish = nbatch if nbatch < nexo else nexo

    #Time husband contribute to build Q
    mt = 1.0 - setup.mlevel

    # this natually splits everything onto slices

    for ibatch in range(int(np.ceil(nexo / nbatch))):
        #money_i = money[:,istart:ifinish]
        assert ifinish > istart

        money_t = (R * agrid, wf[istart:ifinish], wm[istart:ifinish])
        EV_t = (setup.vsgrid_c, EVr_by_l[:, istart:ifinish, :, :])


        V_pure_i, c_opt_i, x_opt_i, s_opt_i, i_opt_i, il_opt_i, V_all_l_i = \
           v_optimize_couple(money_t,sgrid,EV_t,setup.mgrid,
                             setup.ucouple_precomputed_u,setup.ucouple_precomputed_x,
                                 ls,beta,ushift,dtype=dtype_here,mt=mt)

        V_ret_i = V_pure_i + psi[None, istart:ifinish, None]

        # if dtype_here != dtype type conversion happens here

        V_couple[:, istart:
                 ifinish, :] = V_ret_i  # this estimate of V can be improved
        c_opt[:, istart:ifinish, :] = c_opt_i
        s_opt[:, istart:ifinish, :] = s_opt_i
        i_opt[:, istart:ifinish, :] = i_opt_i
        x_opt[:, istart:ifinish, :] = x_opt_i
        il_opt[:, istart:ifinish, :] = il_opt_i
        V_all_l[:, istart:
                ifinish, :, :] = V_all_l_i  # we need this for l choice so it is ok

        istart = ifinish
        ifinish = ifinish + nbatch if ifinish + nbatch < nexo else nexo

        if verbose:
            print('Batch {} done at {} sec'.format(ibatch,
                                                   default_timer() - start))

    assert np.all(c_opt > 0)

    psi_r = psi[None, :, None].astype(setup.dtype, copy=False)

    # finally obtain value functions of partners
    uf, um = setup.u_part(c_opt, x_opt, il_opt, theta_val[None, None, :],
                          ushift, psi_r)
    uc = setup.u_couple(c_opt, x_opt, il_opt, theta_val[None, None, :], ushift,
                        psi_r)

    EVf_all, EVm_all, EVc_all = (setup.vsgrid_c.apply_preserve_shape(x)
                                 for x in (EV_fem_by_l, EV_mal_by_l, EVc_by_l))

    V_fem = uf + beta * np.take_along_axis(
        np.take_along_axis(EVf_all, i_opt[..., None], 0), il_opt[..., None],
        3).squeeze(axis=3)
    V_mal = um + beta * np.take_along_axis(
        np.take_along_axis(EVm_all, i_opt[..., None], 0), il_opt[..., None],
        3).squeeze(axis=3)
    V_all = uc + beta * np.take_along_axis(
        np.take_along_axis(EVc_all, i_opt[..., None], 0), il_opt[..., None],
        3).squeeze(axis=3)

    #def r(x): return x.astype(dtype)

    def r(x):
        return x

    assert V_all.dtype == dtype
    assert V_fem.dtype == dtype
    assert V_mal.dtype == dtype
    assert c_opt.dtype == dtype
    assert x_opt.dtype == dtype
    assert s_opt.dtype == dtype

    try:
        assert np.allclose(V_all, V_couple, atol=1e-4, rtol=1e-3)
    except:
        #print('max difference in V is {}'.format(np.max(np.abs(V_all-V_couple))))
        pass

    return r(V_all), r(V_fem), r(V_mal), r(c_opt), r(x_opt), r(
        s_opt), il_opt, r(V_all_l)
예제 #31
0
            if current_frame_index < para.start_frame + para.window_length and not is_first_statuation:
                hsv[statistics_index] = frame_HSV
                v_count = counting(statistics_index, hsv, v_count)  # 統計初始值方圖
                statistics_index = statistics_index + 1

            if current_frame_index == para.start_frame + para.window_length and not is_first_statuation:
                v_count = counting(statistics_index, hsv, v_count)
                is_first_statuation = True

            if current_frame_index > para.start_frame + para.window_length or is_first_statuation:
                different_frame = np.zeros((frame_x, frame_y), np.uint8)  # 宣告
                moving_obj_frame_temp = different_frame[:, :]  # 宣告

                moving_obj_bool_frame = np.take_along_axis(v_count, (frame_HSV[:, :, statistics_channel] * 255).
                                                           reshape(frame_x, frame_y, 1).astype(int),
                                                           axis=2) < para.probability_throuhold
                moving_obj_frame_temp[moving_obj_bool_frame.reshape(frame_x, frame_y)] = 255
                different_frame[:, :] = moving_obj_frame_temp
                # 做AND
                # img = different_frame_binary

                and_frame = and_frame_func(previous_different_frame, different_frame)
                previous_different_frame = different_frame
                img = and_frame
                # cv2.imshow('and', img)

                # 侵蝕膨脹
                img = cv2.erode(img, None, iterations=2)  # 侵蝕膨脹去雜訊
                img = cv2.dilate(img, None, iterations=2)
예제 #32
0
파일: lsh.py 프로젝트: VarIr/scikit-hubness
    def kneighbors(
            self,
            X=None,
            n_candidates=None,
            return_distance=True
    ) -> Union[Tuple[np.array, np.array], np.array]:
        """ Retrieve k nearest neighbors.

        Parameters
        ----------
        X: np.array or None, optional, default = None
            Query objects. If None, search among the indexed objects.
        n_candidates: int or None, optional, default = None
            Number of neighbors to retrieve.
            If None, use the value passed during construction.
        return_distance: bool, default = True
            If return_distance, will return distances and indices to neighbors.
            Else, only return the indices.
        """
        check_is_fitted(self, 'index_')
        index = self.index_

        if n_candidates is None:
            n_candidates = self.n_candidates
        n_candidates = check_n_candidates(n_candidates)

        # For compatibility reasons, as each sample is considered as its own
        # neighbor, one extra neighbor will be computed.
        if X is None:
            n_query = self.n_indexed_
            X = np.array([index.get(i) for i in range(n_query)])
            search_from_index = True
        else:
            X = check_array(X)
            n_query = X.shape[0]
            search_from_index = False

        dtype = X.dtype

        # If chosen metric is not among the natively supported ones, reorder the neighbors
        reorder = True if self.metric not in ('angular', 'cosine',
                                              'jaccard') else False

        # If fewer candidates than required are found for a query,
        # we save index=-1 and distance=NaN
        neigh_ind = -np.ones((n_query, n_candidates), dtype=np.int32)
        if return_distance or reorder:
            neigh_dist = np.empty_like(neigh_ind, dtype=dtype) * np.nan
        metric = 'cosine' if self.metric == 'angular' else self.metric

        disable_tqdm = False if self.verbose else True

        if search_from_index:  # search indexed against indexed
            for i in tqdm(
                    range(n_query),
                    desc='Querying',
                    disable=disable_tqdm,
            ):
                # Find the approximate nearest neighbors.
                # Each of the true `n_candidates` nearest neighbors
                # has at least `recall` chance of being found.
                ind = index.search_from_index(
                    i,
                    n_candidates,
                    self.recall,
                )

                neigh_ind[i, :len(ind)] = ind
                if return_distance or reorder:
                    X_neigh_denormalized = \
                        X[ind] * self.X_indexed_norm_[ind].reshape(len(ind), -1)
                    neigh_dist[i, :len(ind)] = pairwise_distances(
                        X[i:i + 1, :] * self.X_indexed_norm_[i],
                        X_neigh_denormalized,
                        metric=metric,
                    )
        else:  # search new query against indexed
            for i, x in tqdm(
                    enumerate(X),
                    desc='Querying',
                    disable=disable_tqdm,
            ):
                # Find the approximate nearest neighbors.
                # Each of the true `n_candidates` nearest neighbors
                # has at least `recall` chance of being found.
                ind = index.search(
                    x.tolist(),
                    n_candidates,
                    self.recall,
                )

                neigh_ind[i, :len(ind)] = ind
                if return_distance or reorder:
                    X_neigh_denormalized =\
                        np.array([index.get(i) for i in ind]) * self.X_indexed_norm_[ind].reshape(len(ind), -1)
                    neigh_dist[i, :len(ind)] = pairwise_distances(
                        x.reshape(1, -1),
                        X_neigh_denormalized,
                        metric=metric,
                    )

        if reorder:
            sort = np.argsort(neigh_dist, axis=1)
            neigh_dist = np.take_along_axis(neigh_dist, sort, axis=1)
            neigh_ind = np.take_along_axis(neigh_ind, sort, axis=1)

        if return_distance:
            return neigh_dist, neigh_ind
        else:
            return neigh_ind
예제 #33
0
    def __getitem__(self, index):
        depthmap_orig = load_depthmap(self.names[index], self.img_width, self.img_height, self.max_depth)
        pose_orig = self.joints_world[index]

        depthmap, pose, cropped_cfg = crop_from_xyz_pose(depthmap_orig, pose_orig, self.cfg, out_w=128, out_h=128, pad=20.0, max_depth=self.max_depth)
        #show_Data(depthmap, pose, cropped_cfg, self.max_depth)

        """
        if self.training:
            com = center_of_mass(depthmap, cropped_cfg)
            inv_depthmap = -depthmap + self.max_depth
            aug_dms, pose = data_aug(inv_depthmap, pose, cropped_cfg, com)
            depthmap = -aug_dms + self.max_depth
        """

        #show_Data(depthmap, pose, cropped_cfg, self.max_depth)


        xyzlocal_pose = xyz2xyz_local(pose, cropped_cfg)



        """
        for i in range(3):
            if pose[:,i].min() < self.stored_min[i]:
                self.stored_min[i] = pose[:,i].min()
                print()
                print('MIN, MAX =', self.stored_min, self.stored_max)
    
            if pose[:,i].max() > self.stored_max[i]:
                self.stored_max[i] = pose[:,i].max()
                print()
                print('MIN, MAX =', self.stored_min, self.stored_max)
        """


        offset = np.tile(self.offset[None,:], (xyzlocal_pose.shape[0], 1))
        joints_world_normalized = xyzlocal_pose + offset

        #scale = np.tile(np.array([self.scale])[None,:], aug_poses.shape)
        joints_world_normalized = joints_world_normalized / self.scale
        
        if joints_world_normalized.min() < -1 or joints_world_normalized.max() > 1:
            print('trouble trouble trouble ', joints_world_normalized.min(), joints_world_normalized.max())
            
        
        wrist = joints_world_normalized[0,:][None,:]

        joints_world_normalized = np.concatenate([wrist, joints_world_normalized[1:5,:],
                                                       wrist, joints_world_normalized[5:9,:],
                                                       wrist, joints_world_normalized[9:13,:],
                                                       wrist, joints_world_normalized[13:17,:],
                                                       wrist, joints_world_normalized[17:,:]
                                                       ], axis=0)

        joints_world_normalized = joints_world_normalized.reshape(5, -1, 3)
        joints_world_normalized = joints_world_normalized.reshape(5, -1)




        if self.not_initialized:
            self.not_initialized = False
            """
            points = depthmap2points(depthmap, self.fx, self.fy)
            points = points.reshape((-1, 3))

            j = 0
            fig = plt.figure(figsize=(10, 10))
            ax = fig.gca(projection='3d')
            for i in range(points.shape[0]):
                xs, ys, zs = points[i,:]
                if zs != self.max_depth:
                    if (j % 10) == 0:
                        ax.scatter(xs, ys, zs, c='r', marker='o')
                    j += 1
            for i in range(self.joints_world[index].shape[0]):
                xs, ys, zs = self.joints_world[index][i,:]
                ax.scatter(xs, ys, zs, c='b', marker='o')
            #plt.savefig('fig_{}.png'.format(i), dpi=400, bbox_inches='tight')
            plt.show()
            print('hej')
            """

        depthmap /= self.max_depth
        depthmap = 1 - depthmap


        if self.training:
            """ Add Gaussian Noise """
            #depthmap += np.random.randn(*depthmap.shape)*0.022
            #depthmap = np.clip(depthmap, 0., 1.)
            
            """ scramble """
            for _ in range(2):
                depthmap_ = depthmap.reshape((-1,4))
                scramble = np.argsort(1.5*np.random.randn(*depthmap_.shape) + np.arange(4))
                depthmap = np.take_along_axis(depthmap_,scramble,1).reshape(*depthmap.shape)
                depthmap = depthmap.transpose()

        #depthmap = np.concatenate([depthmap, np.zeros((self.img_width-self.img_height, self.img_width), dtype=np.float32)], axis=0)
        
        #depthmap = np.array(Image.fromarray(depthmap).resize((100, 100)))
        #depthmap = imresize(depthmap, (100,100), interp='bilinear', mode='F')
        
        
        return np.float32(depthmap.reshape((1, *depthmap.shape))), np.float32(joints_world_normalized)
            #save_to_jpg('test%1.png', depthmap, format="PNG")
        """