Example #1
0
 def initiate_ts_stge(dm,
                      gene_df,
                      l_corr=200,
                      t_corr=5,
                      sigma_f=1000,
                      sigma_s=1000,
                      sigma_t=5000,
                      diag_val=0,
                      reg_method="zscore"):
     stge = STGE()
     stge.register_data_manager(dm)
     stge.set_Ys_Yt_A(gene_df, reg_method=reg_method)
     stge.diag_val = diag_val
     stge.set_params(l_corr, t_corr, sigma_f, sigma_s, sigma_t)
     return (stge)
Example #2
0
 def initiate_YsYt_dict_stge(l_corr=200,
                             t_corr=5,
                             sigma_f=1000,
                             sigma_s=200,
                             sigma_t=5000,
                             diag_val=0,
                             ref_cell_num=100):
     dm = data_manupulation.ts_dm(ref_cell_num)
     store_dict = load_obj("selected_cluster_gene_Ys_Yt.obj")
     dm.t_vec = store_dict['t_vec']
     dm.sc_t_vec = store_dict['sc_t_vec']
     dm.sc_t_nums = store_dict['sc_t_nums']
     dm.sc_t_breaks = store_dict['sc_t_breaks']
     dm.refresh_ref_t()
     stge = STGE()
     stge.register_data_manager(dm)
     stge.Yt = store_dict['Yt']
     stge.Ys = store_dict['Ys']
     stge.diag_val = diag_val
     stge.A = stge.dm.get_ts_assignment_matrix()
     stge.set_params(l_corr, t_corr, sigma_f, sigma_s, sigma_t)
     return (stge)
Example #3
0
 def initiate_sim_stge(sim_dm,
                       l_corr=200,
                       t_corr=5,
                       sigma_f=1000,
                       sigma_s=1000,
                       sigma_t=5000,
                       diag_val=0):
     stge = STGE()
     stge.register_data_manager(sim_dm)
     stge.diag_val = diag_val
     stge.set_params(l_corr, t_corr, sigma_f, sigma_s, sigma_t)
     stge.A = stge.dm.get_ts_assignment_matrix()
     stge.Yt = stge.dm.Yt
     if len(stge.dm.sc_t_vec) > 0:
         stge.Ys = np.concatenate(
             [np.transpose(stge.dm.sc_dict[t]) for t in stge.dm.sc_t_vec],
             axis=0)
     gene_num = stge.Yt.shape[1]
     stge.gene_id_list = np.arange(gene_num)
     stge.gene_name_list = np.arange(gene_num)
     return (stge)
Example #4
0
 def standard_recover(file_path):
     dm = data_manupulation.standard_dm(1000, default=True)
     stge = STGE()
     stge.register_data_manager(dm)
     store_dict = load_obj(file_path)
     stge.Pi_list = [np.array(Pi) for Pi in store_dict['Pi_list']]
     stge.mDelta_list = [
         np.array(mDelta) for mDelta in store_dict['mDelta_list']
     ]
     stge.set_params(store_dict['l_corr'], store_dict['t_corr'],
                     store_dict['sigma_f'], store_dict['sigma_s'],
                     store_dict['sigma_t'])
     stge.gene_id_list = store_dict['gene_id_list']
     stge.gene_name_list = store_dict['gene_name_list']
     stge.A = stge.dm.get_ts_assignment_matrix()
     stge.Ys = stge.dm.get_sc_exp_mat(stge.gene_id_list)
     stge.Yt = stge.dm.get_ts_exp_mat(stge.gene_id_list)
     return (stge)
Example #5
0
def set_up_optimized_stge(dm, marker_gene_df, reconst_gene_df, vb_params,
                          reconst_params, vb_iter, iter_num):
    print("check")
    stge = STGE()
    dm.process()
    dm.change_gene_df(marker_gene_df)
    stge.register_data_manager(dm)
    stge.set_params(**vb_params)
    stge.init_VB_var()
    stge.variational_bayes(max_iter=vb_iter)
    for i in range(iter_num):
        stge.set_optimized_sigma_f()
        stge.set_optimized_sigma_s_t()
        stge.variational_bayes(max_iter=vb_iter)
    stge.dm.change_gene_df(reconst_gene_df, False)
    stge.set_params(**reconst_params)
    stge.sc_mode()
    return (stge)
                        '-n',
                        default=2000,
                        type=int,
                        help='number of points sampled from cell density')
    parser.add_argument('--time', '-t', default=6.0, type=float, help='hpf')
    args = parser.parse_args()
    # cell coordinate preparation
    gpp = GP_data_processor()
    gpp.register_file(args.dens)
    point_mat = gpp.sample_point_time(args.time, size=args.sample_num)
    divnum = np.arange(-2, 2, 0.055)
    # expression data preparation
    ts_all = tomo_seq_all_axis(point_mat)
    for axis in ['av', 'vd', 'lr']:
        fname_av = args.expression + '_' + axis + '.csv'
        ts_all.register_axis(fname_av, axis, divnum)
    # adding data for STGE
    slice_all = ts_all.get_slice_list()
    gene_id = args.gene
    exp_all = ts_all.get_expression(gene_id)
    stge = STGE()
    stge.add_region_list(slice_all, exp_all)
    # tunig and save hyper parameters
    inits = np.random.uniform(0.01, 10, 3)
    res = stge.optimize_parameters(sigma_f=inits[0],
                                   l_corr=inits[1],
                                   sigma_obs=inits[2])
    gene_param = np.concatenate(([gene_id], res.astype(str))).reshape([1, 4])
    print(gene_param)
    np.savetxt(args.ofn, gene_param, delimiter=',', fmt="%s")
Example #7
0
    # load coordinate of cells from input file
    all_p_mat = np.load(args.ifn)[:, 0:3]
    # make point matrix included in sliced sample
    slice_num = args.slice
    pmat_list \
        = slice_gen\
        (all_p_mat,\
         slice_num,slice_num,slice_num)
    # Observe expression for each sliced sample
    obs_vec = observe_expression\
              (test_express_func, pmat_list, beta = args.beta)
    # sampleing points for reconstruction
    sample_index = np.random.randint\
                    (all_p_mat.shape[0],size=args.num)
    sample_pmat = all_p_mat[sample_index, :]
    # reconstruct expression on sampled points
    print("start")
    start = time.time()
    reconst_mat = STGE().reconstruct_expression\
                  (obs_vec, pmat_list, \
                   sigma_f, l_corr, \
                   args.sigma_obs, \
                   sample_pmat = sample_pmat)
    last = time.time()
    print(last - start)
    print(reconst_mat[0:10, :])
    print(reconst_mat.shape)
    print(args.ofn)
    # save reconstructed
    np.save(args.ofn, reconst_mat)
Example #8
0
                     type=str,
                     help='specifing optimized parameters')
 parser.add_argument('--no_ss', action='store_true', help='no somite stage')
 args = parser.parse_args()
 # data preparation
 dm = data_manager(args.sample_opt)
 hpf = args.hpf
 refhpf = {"shield": 7.5, "ss10": 16.8}
 dm.register_tomoseq('data/base_data/tomo_seq/zfshield', refhpf["shield"])
 if not args.no_ss:
     dm.register_tomoseq_ss('data/base_data/tomo_seq/zf10ss',
                            refhpf["ss10"])
 dm.zero_num = 0
 dm.process(args.sample_opt)
 # optimize parameter
 stge = STGE()
 stge.register_data_manager(dm)
 ## 1 at index of parameter which estimate
 opt_flag_vec = [int(var in args.opt_flag) for var in ['l', 't', 's']]
 if np.sum(opt_flag_vec) > 0:
     opt_params = stge.optimize_parameters(args.gene_id, args.l_corr,
                                           args.t_corr, args.sigma_obs,
                                           opt_flag_vec)
 else:
     opt_params = np.array(
         [args.sigma_f, args.l_corr, args.t_corr, args.sigma_obs])
 np.savetxt(args.ofn_opt, opt_params, delimiter=",")
 # reconstruct
 # dm.zero_num = args.zero_num
 dm.process(args.sample_reconst)
 stge = STGE()
Example #9
0
    args = parser.parse_args()
    # cell coordinate preparation
    fname = args.dir + '/cell_density_mat.mat'
    gpp = GP_data_processor()
    gpp.register_file(fname)
    point_mat = gpp.sample_point_time(args.hpf,size=args.num)
    divnum = np.arange(-2,2,args.width)
    # expression data preparation
    ts_all = tomo_seq_all_axis(point_mat)
    fname_av = args.dir + '/zfshild_av.csv'
    ts_all.register_axis(fname_av,"av",divnum)
    fname_vd = args.dir + '/zfshild_vd.csv'
    ts_all.register_axis(fname_vd,"vd",divnum)
    fname_lr = args.dir + '/zfshild_lr.csv'
    ts_all.register_axis(fname_lr,"lr",divnum)
    slice_all = ts_all.get_slice_list()
    gene_id = args.gene
    exp_all = ts_all.get_expression(gene_id)
        # parameters
    sigma_list = np.array([args.sigma for _ in range(3)])
    beta = args.beta
    # sampleing points for reconstruction
    sample_pmat = point_mat
    #reconstruct
    reconst_mat = STGE().reconstruct_expression\
        (exp_all, slice_all,
         sigma_list, beta = beta, \
         sample_pmat = sample_pmat)
    # save reconstructed
    np.savetxt(args.ofn,reconst_mat,delimiter=',')
Example #10
0
def test_2d(rmin,
            rmax,
            sigma_list,
            beta,
            ebeta,
            sample_num,
            xlist,
            ylist,
            outfile,
            test_func=one_mode,
            struct=lambda x: True):
    start = time.time()
    gram_mat = STGE()
    test_func_reg = lambda point: test_func(point[0], point[1])
    # sample points
    point_mat = np.random.uniform(low=rmin, high=rmax, size=(sample_num, 2))
    point_mat = get_point_cond(point_mat, struct)
    # slice by x
    for ix in range(len(xlist) - 1):
        #x
        lx = xlist[ix]
        ux = xlist[ix + 1]
        rectangle = [[lx, ux], [rmin, rmax]]
        # set boundary
        xls = [lx, rmin]
        xus = [ux, rmax]
        # select points included in region
        satisfy_point_mat = get_point_region(point_mat, xls, xus)
        region = satisfy_point_mat
        # apply func for each point
        func_val_mat = np.apply_along_axis\
                         (test_func_reg,\
                          axis=-1,\
                          arr=satisfy_point_mat)
        # sum func value for all point pair
        y = np.sum(func_val_mat)
        obs = np.random.normal(y, math.sqrt(float(1) / beta))
        gram_mat.add_region(region, obs)
    # slice by y
    for iy in range(len(ylist) - 1):
        #y
        ly = ylist[iy]
        uy = ylist[iy + 1]
        rectangle = [[rmin, rmax], [ly, uy]]
        xls = [rmin, ly]
        xus = [rmax, uy]
        satisfy_point_mat = get_point_region(point_mat, xls, xus)
        region = satisfy_point_mat
        # apply func for each point
        func_val_mat = np.apply_along_axis\
                         (test_func_reg,\
                          axis=-1,\
                          arr=satisfy_point_mat)
        # sum func value for all point pair
        y = np.sum(func_val_mat)
        obs = np.random.normal(y, math.sqrt(float(1) / beta))
        gram_mat.add_region(region, obs)
    # calculate gram matrix
    gram_mat.refresh_mat(sigma_list, ebeta)
    #print(np.round(gram_mat.gram_mat,1))
    print(gram_mat.gram_mat.size)
    col_names = ["x", "y", "true", "estimate", "var", "lower", "upper"]
    plotDf = pd.DataFrame(columns=col_names)
    # estimate each element
    for x, y in itertools.product(xlist, ylist):
        # initiate a point
        point = np.array([x, y])
        # record ture value
        true = test_func(x, y)
        # get estimated mena and variance
        mean_var = gram_mat.mean_var(point)
        estiamte = mean_var[0]
        var = mean_var[1]
        # monitor if estimated variance be negative
        if var < 0:
            print("netgative", var)
        # lower and upper bound of 65& confidence interval
        lower = estiamte - math.sqrt(abs(var))
        upper = estiamte + math.sqrt(abs(var))
        # register this record for written object
        record = pd.Series([x, y, true, estiamte, var, lower, upper],
                           index=col_names)
        plotDf = plotDf.append(record, ignore_index=True)
    plotDf.to_csv(outfile, sep='\t')
    lapse = time.time() - start
    print("time")
    print(lapse)
    print("integrate time")
    print(sum(integrate_time_list))
    del integrate_time_list[:]