Ejemplo n.º 1
0
def main():
    file1 = "datasets/osmFISH_SScortex_mouse_all_cells.loom"
    f = h5py.File(file1, mode='r')
    gene_expression = np.asarray(f['matrix'])
    gene_expression = np.transpose(gene_expression)
    gene_sum = np.sum(gene_expression, axis=1, keepdims=True)
    zero_mask = gene_sum != 0
    zero_mask = np.reshape(zero_mask, len(zero_mask))
    gene_expression = gene_expression[zero_mask, :]
    gene_sum = gene_sum[zero_mask, :]
    meta = f['col_attrs']
    genes = np.asarray(f['row_attrs']['Gene'])
    genes = [x.decode() for x in genes]
    x = np.asarray(meta['X'])[zero_mask]
    y = np.asarray(meta['Y'])[zero_mask]
    coordinates = np.stack((x, y), axis=1)
    plt.scatter(x, y, s=1)
    cell_types = np.asarray(meta['ClusterID'])[zero_mask]
    n_c = len(set(cell_types))
    if not os.path.isdir("Benchmark/osmFISH"):
        os.mkdir("Benchmark/osmFISH")
    save_f = "Benchmark/osmFISH/"
    smfishHmrf_f = "Benchmark/osmFISH/data/"
    ### Data preprocessing
    gene_expression = gene_expression / gene_sum
    bregma = np.zeros(len(x))
    split_n = 4
    threshold_distance = 50
    boarder = np.arange(min(x), max(x) + 1, (max(x) - min(x)) / 4)
    for i in np.arange(split_n):
        bregma[np.logical_and(x > boarder[i], x < boarder[i + 1])] = i
    for i in np.arange(split_n - 1):
        plt.axvline(x=boarder[i + 1])
    real_df = RealDataLoader(gene_expression,
                             coordinates,
                             threshold_distance=threshold_distance,
                             cell_labels=cell_types,
                             num_class=n_c,
                             field=bregma,
                             for_eval=False)
    dop.save_loader(real_df, save_f + '0')
    for i in np.arange(split_n):
        mask = bregma == i
        loader = RealDataLoader(gene_expression[mask, :],
                                coordinates[mask, :],
                                threshold_distance=threshold_distance,
                                cell_labels=cell_types[mask],
                                num_class=n_c,
                                field=bregma[mask],
                                gene_list=genes,
                                for_eval=False)
        dop.save_loader(loader, smfishHmrf_f + str(i + 1))
        dop.save_smfish(loader, smfishHmrf_f + str(i + 1))
Ejemplo n.º 2
0
def main():
    ### Hyper parameter setting
    print("Setting hyper parameter")
    n_c = 10  #Number of cell type
    threshold_distance = 100  # The threshold distance of neighbourhood.
    gene_col = np.arange(9, 164)
    coor_col = [5, 6]
    header = 0
    data_f = "datasets/Moffitt_and_Bambah-Mukku_et_al_merfish_all_cells.csv"
    if not os.path.exists(data_f):
        raise FileNotFoundError(
            "MERFISH full matrix file can't be found, please download it first."
        )
    save_f = "Benchmark/MERFISH/data/"
    os.makedirs(save_f, exist_ok=True)
    smfishHmrf_save = "Benchmark/MERFISH/data/"
    ### Data preprocessing
    print("Reading data from %s" % (data_f))
    if data_f.endswith('.xlsx'):
        data_all = pd.read_excel(data_f, header=header)
    elif data_f.endswith('.csv'):
        data_all = pd.read_csv(data_f, header=header)
    animal_idxs = np.unique(data_all['Animal_ID'])
    gene_expression_all = data_all.iloc[:, gene_col]
    nan_cols = np.unique(np.where(np.isnan(gene_expression_all))[1])
    for nan_col in nan_cols:
        gene_col = np.delete(gene_col, nan_col)
    gene_name = data_all.columns[gene_col]
    bregmas_smfish = [0.01] * 3 + [-0.04
                                   ] + [0.01] * 3 + [-0.04] * 4 + [0.11] * 25
    for animal_id in animal_idxs:
        print("Extract the data for animal %d" % (animal_id))
        data = data_all[data_all['Animal_ID'] == animal_id]
        cell_types = data['Cell_class']
        data = data[cell_types != 'Ambiguous']
        cell_types = data['Cell_class']
        try:
            bregma = data['Bregma']
        except:
            bregma = data['Field of View']
        gene_expression = data.iloc[:, gene_col]
        coordinates = data.iloc[:, coor_col]
        coordinates = np.asarray(coordinates)
        gene_expression = np.asarray(gene_expression)
        gene_expression = gene_expression / np.sum(
            gene_expression, axis=1, keepdims=True)
        real_df = RealDataLoader(gene_expression,
                                 coordinates,
                                 threshold_distance=threshold_distance,
                                 gene_list=gene_name,
                                 cell_labels=cell_types,
                                 num_class=n_c,
                                 field=bregma,
                                 for_eval=False)
        mask = bregma == bregmas_smfish[animal_id - 1]
        smfish_loader = RealDataLoader(gene_expression[mask, :],
                                       coordinates[mask, :],
                                       threshold_distance=threshold_distance,
                                       gene_list=gene_name,
                                       cell_labels=cell_types[mask],
                                       num_class=n_c,
                                       field=[bregmas_smfish[animal_id - 1]] *
                                       sum(mask),
                                       for_eval=False)
        dop.save_smfish(smfish_loader, smfishHmrf_save + str(animal_id))
        dop.save_loader(real_df, save_f + str(animal_id))
Ejemplo n.º 3
0
def main(sim_data,base_f):
    sim_gene_expression,sim_cell_type,sim_cell_neighbour = sim_data
    mask = np.zeros(len(sim_cell_type),dtype = np.bool)
    mask[:] = True
    reduced_d = 10
    k_n = 5
    ### train a embedding model from the simulated gene expression
    print("Begin training the embedding model.")
    np.savez(os.path.join(base_f,'sim_gene.npz'),
             feature = sim_gene_expression,
             labels = sim_cell_type)
    class Args:
        pass
    args = Args()
    args.train_data = os.path.join(base_f,'sim_gene.npz')
    args.eval_data = os.path.join(base_f,'sim_gene.npz')
    args.log_dir = base_f
    args.model_name = "simulate_embedding"
    args.embedding_size = reduced_d
    args.batch_size = 2000
    args.step_rate= 4e-3
    args.drop_out = 0.7
    args.epoches = 200
    args.retrain = False
    args.device = None
    train_wrapper(args)
    embedding_file = os.path.join(base_f,'simulate_embedding/')
    embedding = emb.load_embedding(embedding_file)
    ### Dimensional reduction of simulated gene expression using PCA or embedding
    class_n,gene_n = sim.g_mean.shape
    plot_freq(sim_cell_neighbour,sim_cell_type,[0,1,2])
    arti_posterior = one_hot_vector(sim_cell_type)[0]
    int_type,tags = tag2int(sim_cell_type)
    np.random.shuffle(arti_posterior)
    data_loader = RealDataLoader(sim_gene_expression,
                                 sim.coor,
                                 threshold_distance = 1,
                                 num_class = class_n,
                                 cell_labels = sim_cell_type,
                                 gene_list = np.arange(sim_gene_expression.shape[1]))
    data_loader.dim_reduce(method = "Embedding",embedding = embedding)
    model = load_train(data_loader)
    data_folder = os.path.join(base_f,'data/')
    if not os.path.isdir(data_folder):
        os.mkdir(data_folder)
    save_smfish(data_loader,data_folder+str(run_i),is_labeled = True)
    save_loader(data_loader,data_folder+str(run_i))
    fict_folder = os.path.join(base_f,'FICT_result/')
    if not os.path.isdir(fict_folder):
        os.mkdir(fict_folder)
    result_f = fict_folder+str(run_i)
    if not os.path.isdir(result_f):
        os.mkdir(result_f)
    plt.figure()
    plt.scatter(sim.coor[:,0],sim.coor[:,1],c = sim_cell_type)
    plt.title("Cell scatter plot.")
    
    ## Create a model and train using only gene expression.
    print("#####################################")
    print("Training with gene expression only.")
    model_gene = FICT_EM(reduced_d,class_n)
    em_epoches = 5
    Accrs_gene = []
    Accrs_gene_same = []
    thres_dist = 1
    data_loader.renew_neighbourhood(arti_posterior,
                                    threshold_distance=thres_dist)
    batch = data_loader.xs
    model_gene.gaussain_initialize(batch[0])
    for i in np.arange(em_epoches):
        posterior,ll,_ = model_gene.expectation(batch,
                                           spatio_factor=0,
                                           gene_factor=1,
                                           prior_factor = 0.0)
        model_gene.maximization(batch,
                                posterior,
                                decay = 0.5,
                                update_gene_model = True,
                                update_spatio_model = False,
                                stochastic_update=False)
        predict = np.argmax(posterior,axis=0)
        partial_predict = predict[mask]
        partial_cell_type = sim_cell_type[mask]
        Accuracy,perm = permute_accuracy(predict,sim_cell_type)
        rand_score = adjusted_rand_score(predict,sim_cell_type)
        Accuracy_same_gene = accuracy_with_perm(partial_predict,
                                                partial_cell_type,
                                                perm)
        rand_score_same_gene = adjusted_rand_score(partial_predict,partial_cell_type)
        Accrs_gene.append(Accuracy)
        Accrs_gene_same.append(Accuracy_same_gene)
        print("Permutation accuracy of mixd cells:%f"%(Accuracy_same_gene))
        print("Permutation accuracy of all cells:%f"%(Accuracy))
    predict_gene = predict
    gene_p = np.copy(model_gene.p['g_mean'])
    print("#####################################")
    print("\n")
    with open(os.path.join(result_f,"gene_model.bn"),'wb+') as f:
        pickle.dump(model_gene,f)
    
    ## Train a spatio model with true neighbourhood
    print("#####################################")
    print("Train a spatial model based on true nieghbourhood.")
    model = model_gene
    em_epoches = 10
    Accrs_spatial = []
    Accrs_spatial_same = []
    #### Update neighbourhood count with true label, to get the best possible
    #### accuracy of the spatio model
    arti_posterior = one_hot_vector(sim_cell_type)[0]
    data_loader.renew_neighbourhood(arti_posterior,threshold_distance=thres_dist)
    batch = data_loader.xs
    for i in np.arange(em_epoches):
        posterior,ll,_ = model.expectation(batch,
                                  spatio_factor=1.0,
                                  gene_factor=0.0,
                                  prior_factor = 0,
                                  equal_contrib = True)
        predict = np.argmax(posterior,axis=0)
        model.maximization(batch,
                           posterior,
                           decay = None,
                           update_gene_model = False,
                           update_spatio_model = True,
                           stochastic_update=False)
        partial_predict = predict[mask]
        partial_cell_type = sim_cell_type[mask]
        Accuracy,perm = permute_accuracy(predict,sim_cell_type)
        Accuracy_same = accuracy_with_perm(partial_predict,
                                           partial_cell_type,
                                           perm)
        Accrs_spatial.append(Accuracy)
        Accrs_spatial_same.append(Accuracy_same)
        print("Permute accuracy from true neighbourhood of mixd cell:%f"%(Accuracy_same))
        print("Permute accuracy for all:%f"%(Accuracy))
    print("#####################################")
    print("\n")
    ####
        
    
    ### Initialize using gene model
    ### Update with the prediction of gene model
    print("#####################################")
    print("Training with spatio and gene expression.")
    Accrs_both = []
    Accrs_both_same = []
    
    batch = data_loader.xs
    posterior,ll,_ = model.expectation(batch,
                                  spatio_factor=0,
                                  gene_factor=1,
                                  prior_factor = 0.0)
    data_loader.renew_neighbourhood(posterior.transpose(),
                                    threshold_distance=thres_dist)
    batch = data_loader.xs
    model.maximization(batch,
                       posterior,
                       update_gene_model = False,
                       update_spatio_model = True,
                       stochastic_update=False)
    icm_steps = 30
    both_rounds = 10
    for i in np.arange(both_rounds):
        batch,_ = data_loader.next_batch(sim_gene_expression.shape[0],shuffle= False)
        for j in np.arange(icm_steps):
            posterior,ll,_ = model.expectation(batch,
                                      spatio_factor=1.2,
                                      gene_factor=1,
                                      prior_factor = 0,
                                      equal_contrib = True)
            data_loader.renew_neighbourhood(posterior.transpose(),
                                            partial_update = 0.1,
                                            threshold_distance=thres_dist)
        predict = np.argmax(posterior,axis=0)
        model.maximization(batch,
                           posterior,
                           decay = 0.5,
                           update_gene_model = False,
                           update_spatio_model = True,
                           stochastic_update=False)
        partial_predict = predict[mask]
        partial_cell_type = sim_cell_type[mask]
        Accuracy,perm = permute_accuracy(predict,sim_cell_type)
        Accuracy_same = accuracy_with_perm(partial_predict,
                                           partial_cell_type,
                                           perm)
        Accrs_both.append(Accuracy)
        Accrs_both_same.append(Accuracy_same)
        print("Permute accuracy of mixd cell:%f"%(Accuracy_same))
        print("Permute accuracy for all:%f"%(Accuracy))
        print("Likelihood %.2f"%(ll))
    print("#####################################")
    print("\n")
    with open(os.path.join(result_f,"sg_model.bn"),'wb+') as f:
        pickle.dump(model,f)
    ### Begin the plot
    plt.close('all')
    fig = plt.figure(figsize = (20,10))
    ax = fig.add_subplot(111, projection='3d')
    nb_reduced = manifold.TSNE().fit_transform(sim_cell_neighbour)
    color_map = np.asarray(['r','g','b'])
    hit_map = np.asarray(['red','green'])
    
    ### True label plot
    ax.scatter(sim_cell_neighbour[:,0],
               sim_cell_neighbour[:,1],
               sim_cell_neighbour[:,2],
               c=color_map[sim_cell_type])
    colors = ['red','green','blue']
    figs,axs = plt.subplots(nrows = 2,ncols =2,figsize = (20,10))
    figs2,axs2 = plt.subplots(nrows = 2,ncols = 2,figsize=(20,10))
    ax = axs[0][0]
    scatter = axs[0][0].scatter(nb_reduced[:,0],
                                nb_reduced[:,1],
                                c = color_map[sim_cell_type],s = 10)
#    legend = ax.legend(*scatter.legend_elements(),
#                        loc="lower left", title="Classes")
#    ax.add_artist(legend)
    axs[0][0].set_title("True label")
    scatter = axs2[0][0].scatter(nb_reduced[:,0],nb_reduced[:,1],c = sim_cell_type,s=10)
    ax = axs2[0][0]
#    legend = ax.legend(*scatter.legend_elements(),
#                        loc="lower left", title="Classes")
#    ax.add_artist(legend)
    axs2[0][0].set_title("True label")
        
    ### Spatio model plot
    for i in np.arange(10):
        posterior_spatio,ll,_ = model.expectation(batch,
                                      spatio_factor=1,
                                      gene_factor=1,
                                      prior_factor = 1.0,
                                      equal_contrib = True)
        data_loader.renew_neighbourhood(posterior_spatio.transpose(),
                                        threshold_distance=thres_dist)
    posterior_spatio,ll,_ = model.expectation(batch,
                                  spatio_factor=1,
                                  gene_factor=0,
                                  prior_factor = 0.0)
    predict_spatio = np.argmax(posterior_spatio,axis=0)
    ari_spatio = adjusted_rand_score(predict_spatio,sim_cell_type)
    print("Adjusted rand score of spatio model only %.3f"%(ari_spatio))
    perm_accur_spatio,perm_spatio = permute_accuracy(predict_spatio,sim_cell_type)
    print("Best accuracy of spatio model only %.3f"%(perm_accur_spatio))
    ax = axs[0][1]
    scatter = ax.scatter(nb_reduced[:,0],
                         nb_reduced[:,1],
                         c = color_map[predict_spatio],
                         s = 10)
#    legend = ax.legend(*scatter.legend_elements(),
#                       loc="lower left", 
#                       title="Classes")
#    ax.add_artist(legend)
    ax.set_title("Predict by spatio model")
    #Plot the hit
    ax = axs2[0][1]
    predict_spatio,_ = tag2int(predict_spatio)
    hit_spatio = np.zeros(len(predict_spatio))
    for i,p in enumerate(perm_spatio):
        hit_spatio = np.logical_or(hit_spatio,(predict_spatio==p)*(int_type==i))
    scatter = ax.scatter(nb_reduced[:,0],
                         nb_reduced[:,1],
                         c = hit_map[hit_spatio.astype(int)],
                         s = 10)
#    legend = ax.legend(*scatter.legend_elements(),
#                        loc="lower left", 
#                        title="Classes")
#    ax.add_artist(legend)
    ax.set_title("Hit by spatio model")
    
    ### Gene model plot
    accur,perm_gene = permute_accuracy(predict_gene,sim_cell_type)
    ari_gene = adjusted_rand_score(predict_gene,sim_cell_type)
    print("Adjusted rand score of gene model only %.3f"%(ari_gene))
    print("Best accuracy of gene model only %.3f"%(accur))
    ax = axs[1][0]
    scatter = ax.scatter(nb_reduced[:,0],
                         nb_reduced[:,1],
                         c = color_map[predict_gene],
                         s = 10)
#    legend = ax.legend(*scatter.legend_elements(),
#                        loc="lower left", title="Classes")
#    ax.add_artist(legend)
    ax.set_title("Predict by gene model")
    ax = axs2[1][0]
    predict_gene,_ = tag2int(predict_gene)
    hit_gene = np.zeros(len(predict_gene))
    for i,p in enumerate(perm_gene):
        hit_gene = np.logical_or(hit_gene,(predict_gene==p)*(int_type==i))
    scatter = ax.scatter(nb_reduced[:,0],
                         nb_reduced[:,1],
                         c = hit_map[hit_gene.astype(int)],
                         s = 10)
#    legend = ax.legend(*scatter.legend_elements(),
#                        loc="lower left", title="Classes")
#    ax.add_artist(legend)
    ax.set_title("Hit by gene model")
    
    
    ###Gene+spatio model plot
    posterior_sg,ll,expectations = model.expectation(batch,
                                  spatio_factor=1,
                                  gene_factor=1,
                                  prior_factor = 0.0,
                                  equal_contrib = True)
    predict_sg = np.argmax(posterior_sg,axis=0)
    ari_sg = adjusted_rand_score(predict_sg,sim_cell_type)
    print("Adjusted rand score of gene+spatio model %.3f"%(ari_sg))
    accr_sg,perm_sg = permute_accuracy(predict_sg,sim_cell_type)
    print("Best accuracy of gene+spatio model %.3f"%(accr_sg))
    ax = axs[1][1]
    scatter = ax.scatter(nb_reduced[:,0],
                         nb_reduced[:,1],
                         c = color_map[predict_sg],
                         s = 10)
#    legend = ax.legend(*scatter.legend_elements(),
#                        loc="lower left", title="Classes")
#    ax.add_artist(legend)
    ax.set_title("Predict by gene+spatio model")
    ax = axs2[1][1]
    predict_sg,_ = tag2int(predict_sg)
    hit_sg = np.zeros(len(predict_sg))
    for i,p in enumerate(perm_sg):
        hit_sg = np.logical_or(hit_sg,(predict_sg==p)*(int_type==i))
    scatter = ax.scatter(nb_reduced[:,0],
                         nb_reduced[:,1],
                         c = hit_map[hit_sg.astype(int)],
                         s = 10)
#    legend = ax.legend(*scatter.legend_elements(),
#                        loc="lower left", title="Classes")
#    ax.add_artist(legend)
    ax.set_title("Hit by gene+spatio model")
    
    ###Check different factor setting.
    accurs = []
    spatio_factors = []
    lls = []
    for factor in np.arange(0,1,0.01):
        posterior_sg,ll,_ = model.expectation(batch,
                                  spatio_factor=factor,
                                  gene_factor=1,
                                  prior_factor = 0.0,
                                  equal_contrib = False)
        predict_sg = np.argmax(posterior_sg,axis=0)
        spatio_factors.append(factor)
        accurs.append(permute_accuracy(predict_sg,sim_cell_type)[0])
        lls.append(ll)
    idx = np.argmax(accurs)
    plt.figure()
    plt.plot(spatio_factors,accurs)
    plt.xlabel("The spatio factor(gene factor is 1)")
    plt.ylabel("The permute accuracy.")
    plt.title("The permute accuracy across different spatio factor.")
    print("Best accuracy of gene+spatio model %.3f, with spatio factor %.3f"%(accurs[idx],spatio_factors[idx]))
    return (ari_gene,ari_spatio,ari_sg),(accur,perm_accur_spatio,accr_sg)
Ejemplo n.º 4
0
    expression_f = os.path.join(data_f,prefix+".expression")
    gene_f = os.path.join(data_f,prefix+".genes")
    coordinate = read_smfish_data(coordinate_f,data_type = np.int)
    coordinate = coordinate[:,1:]
    expression = read_smfish_data(expression_f,data_type = np.float)
    gene_list = read_smfish_gene(gene_f)
    fields = split_field(coordinate,y_bin = [-2000])
    fig,ax = plt.subplots()
    ax.scatter(x = coordinate[:,0],y = coordinate[:,1],c = fields,cmap = 'tab20c')
    coordinate = np.concatenate((coordinate,np.zeros((len(coordinate),1))),axis=1)
    real_df = RealDataLoader(expression,
                             coordinate,
                             threshold_distance = threshold_distance,
                             cell_labels = np.zeros(len(expression)),
                             num_class = n_c,
                             field = fields,
                             for_eval = False)
    dop.save_loader(real_df,save_f+str(0))
    for i,f in enumerate(set(fields)):
        mask = fields==f
        smfish_loader = RealDataLoader(expression[mask,:],
									   coordinate[mask,:],
									   threshold_distance = threshold_distance,
	                                   gene_list = gene_list,
	                                   cell_labels = np.zeros(len(expression)),
	                                   num_class = n_c,
	                                   field = [f]*sum(mask),
	                                   for_eval = False)
        dop.save_smfish(smfish_loader,save_f+str(i+1))
        dop.save_loader(smfish_loader,save_f+str(i+1))
    
Ejemplo n.º 5
0
def main(sim_data,base_f,run_idx,n_cell_type,reduced_dimension):
    fict_folder = os.path.join(base_f,'FICT_result/')
    if not os.path.isdir(fict_folder):
        os.mkdir(fict_folder)
    result_f = fict_folder+str(run_idx)
    if not os.path.isdir(result_f):
        os.mkdir(result_f)
    with open(os.path.join(result_f,'config.json'),'w+') as f:
        json.dump(TRAIN_CONFIG,f)
    sim_gene_expression,sim_cell_type,sim_cell_neighbour,mix_mean,mix_cov,mix_cells = sim_data
    mask = np.zeros(len(sim_cell_type),dtype = np.bool)
    mask[mix_cells] = True
    reduced_d = reduced_dimension
    k_n = n_cell_type
    ### train a embedding model from the simulated gene expression
    print("Begin training the embedding model.")
    gene_train,gene_test,type_train,type_test = train_test_split(
            sim_gene_expression,sim_cell_type,test_size=0.2,random_state=42)
    np.savez(os.path.join(result_f,'sim_gene_all.npz'),
             feature = sim_gene_expression,
             labels = sim_cell_type)
    np.savez(os.path.join(result_f,'sim_gene_train.npz'),
             feature = gene_train,
             labels = type_train)
    np.savez(os.path.join(result_f,'sim_gene_test.npz'),
             feature = gene_test,
             labels = type_test)
    class Args:
        pass
    args = Args()
    print("%d run"%(run_idx))
    args.train_data = os.path.join(result_f,'sim_gene_all.npz')
    args.eval_data = os.path.join(result_f,'sim_gene_test.npz')
    args.log_dir = result_f
    args.model_name = "simulate_embedding"
    args.embedding_size = reduced_d
    args.batch_size = sim_gene_expression.shape[0]
    args.step_rate=4e-3
    args.drop_out = 0.9
    args.epoches = 300
    args.threads = 5
    args.retrain = False
    args.device = None
    fig_collection = {}
    train_wrapper(args)
    embedding_file = os.path.join(result_f,'simulate_embedding/')
    embedding = emb.load_embedding(embedding_file)
    
    ### Dimensional reduction of simulated gene expression using PCA or embedding
    class_n,gene_n = sim.g_mean.shape
    fig,axs = plt.subplots()
    plot_freq(sim_cell_neighbour,sim_cell_type,np.arange(class_n),axs)
    fig_collection['Frequency_plot.png'] = fig
    arti_posterior = one_hot_vector(sim_cell_type)[0]
    int_type,tags = tag2int(sim_cell_type)
    np.random.shuffle(arti_posterior)
    data_loader = RealDataLoader(sim_gene_expression,
                                 sim.coor,
                                 threshold_distance = 1,
                                 num_class = class_n,
                                 cell_labels = sim_cell_type,
                                 gene_list = np.arange(sim_gene_expression.shape[1]))
    data_loader.dim_reduce(method = "Embedding",embedding = embedding)
    
    # ## Debugging code
    # return data_loader
    # ##
    
    data_folder = os.path.join(base_f,'data/')
    if not os.path.isdir(data_folder):
        os.mkdir(data_folder)
    save_smfish(data_loader,data_folder+str(run_idx),is_labeled = True)
    save_loader(data_loader,data_folder+str(run_idx))
    plt.figure()
    plt.scatter(sim.coor[:,0],sim.coor[:,1],c = sim_cell_type)
    plt.title("Cell scatter plot.")
    ###Train the model
    ### Begin the plot
    plt.close('all')
    fig = plt.figure(figsize = (20,10))
    fig_collection['3d_type_neighbourhood.png'] = fig
    ax = fig.add_subplot(111, projection='3d')
    nb_reduced = manifold.TSNE().fit_transform(sim_cell_neighbour)
    color_map = np.asarray(['r','g','b','yellow','purple'])
    hit_map = np.asarray(['red','green'])
    
    ### True label plot
    ax.scatter(sim_cell_neighbour[:,0],
               sim_cell_neighbour[:,1],
               sim_cell_neighbour[:,2],
               c=color_map[sim_cell_type])
    colors = ['red','green','blue','yellow','purple']
    figs,axs = plt.subplots(nrows = 2,ncols =2,figsize = (20,10))
    figs2,axs2 = plt.subplots(nrows = 2,ncols = 2,figsize=(20,10))
    fig_collection['hits.png'] = figs
    fig_collection['predictions.png'] = figs2
    ax = axs[0][0]
    scatter = axs[0][0].scatter(nb_reduced[:,0],
                                nb_reduced[:,1],
                                c = color_map[sim_cell_type],s = 10)
#    legend = ax.legend(*scatter.legend_elements(),
#                        loc="lower left", title="Classes")
#    ax.add_artist(legend)
    axs[0][0].set_title("True label")
    scatter = axs2[0][0].scatter(nb_reduced[:,0],nb_reduced[:,1],c = sim_cell_type,s=10)
    ax = axs2[0][0]
#    legend = ax.legend(*scatter.legend_elements(),
#                        loc="lower left", title="Classes")
#    ax.add_artist(legend)
    axs2[0][0].set_title("True label")
    
    batch = data_loader.xs
    model_gene = FICT_EM(reduced_d,class_n)
    em_epoches = 5
    thres_dist = 1
    arti_posterior = one_hot_vector(sim_cell_type)[0]
    int_type,tags = tag2int(sim_cell_type)
    np.random.shuffle(arti_posterior)
    data_loader.renew_neighbourhood(arti_posterior,
                                    threshold_distance=thres_dist)
    batch = data_loader.xs
    model_gene.gaussain_initialize(batch[0])
    ### Gene model plot
    for i in np.arange(em_epoches):
        posterior_gene,ll,_ = model_gene.expectation(batch,
                                           spatio_factor=0,
                                           gene_factor=1,
                                           prior_factor = 0.0)
        model_gene.maximization(batch,
                                posterior_gene,
                                decay = 0.5,
                                update_gene_model = True,
                                update_spatio_model = False,
                                stochastic_update=False)
    predict_gene = np.argmax(posterior_gene,axis=0)
    np.savetxt(os.path.join(result_f,'label_g.csv'),predict_gene.astype(int))
    accur,perm_gene = permute_accuracy(predict_gene,sim_cell_type)
    ari_gene = adjusted_rand_score(predict_gene,sim_cell_type)
    print("Adjusted rand score of gene model only %.3f"%(ari_gene))
    print("Best accuracy of gene model only %.3f"%(accur))
    ax = axs[1][0]
    scatter = ax.scatter(nb_reduced[:,0],
                         nb_reduced[:,1],
                         c = color_map[predict_gene],
                         s = 10)
    ax.set_title("Predict by gene model")
    ax = axs2[1][0]
    predict_gene,_ = tag2int(predict_gene)
    hit_gene = np.zeros(len(predict_gene))
    for i,p in enumerate(perm_gene):
        hit_gene = np.logical_or(hit_gene,(predict_gene==p)*(int_type==i))
    scatter = ax.scatter(nb_reduced[:,0],
                         nb_reduced[:,1],
                         c = hit_map[hit_gene.astype(int)],
                         s = 10)
    ax.set_title("Hit by gene model")
    
    model = load_train(data_loader,num_class = k_n)
    with open(os.path.join(result_f,"sg_model.bn"),'wb+') as f:
        pickle.dump(model,f)
    ###Gene+spatio model plot
    posterior_sg,_,_ = model.expectation(batch,
                                         gene_factor = 1,
                                         spatio_factor = 0,
                                         prior_factor = 0)
    data_loader.renew_neighbourhood(posterior_sg.T,
                                    nearest_k =None,
                                    threshold_distance = 1)
    batch = data_loader.xs
    for k in np.arange(30):
        posterior_sg,_,_ = model.expectation(batch,
                                             gene_factor = 1,
                                             spatio_factor = 1,
                                             prior_factor = 0,
                                             equal_contrib = False)
        data_loader.renew_neighbourhood(posterior_sg.T,
                                        nearest_k =None,
                                        threshold_distance = 1,
                                        partial_update = 0.1)
        batch = data_loader.xs
    posterior_sg,_,_ = model.expectation(batch,
                                         gene_factor = 1,
                                         spatio_factor = 1,
                                         prior_factor = 0,
                                         equal_contrib = False)
    predict_sg = np.argmax(posterior_sg,axis=0)
    np.savetxt(os.path.join(result_f,'label_sg.csv'),predict_sg.astype(int))
    ari_sg = adjusted_rand_score(predict_sg,sim_cell_type)
    print("Adjusted rand score of gene+spatio model %.3f"%(ari_sg))
    accr_sg,perm_sg = permute_accuracy(predict_sg,sim_cell_type)
    print("Best accuracy of gene+spatio model %.3f"%(accr_sg))
    ax = axs[1][1]
    scatter = ax.scatter(nb_reduced[:,0],
                         nb_reduced[:,1],
                         c = color_map[predict_sg],
                         s = 10)
    ax.set_title("Predict by gene+spatio model")
    ax = axs2[1][1]
    predict_sg,_ = tag2int(predict_sg)
    hit_sg = np.zeros(len(predict_sg))
    for i,p in enumerate(perm_sg):
        hit_sg = np.logical_or(hit_sg,(predict_sg==p)*(int_type==i))
    scatter = ax.scatter(nb_reduced[:,0],
                         nb_reduced[:,1],
                         c = hit_map[hit_sg.astype(int)],
                         s = 10)
    ax.set_title("Hit by gene+spatio model")
    
    ###Check different factor setting.
    accurs = []
    spatio_factors = []
    lls = []
    for factor in np.arange(0,1,0.01):
        posterior_sg,ll,_ = model.expectation(batch,
                                  spatio_factor=factor,
                                  gene_factor=1,
                                  prior_factor = 0.0,
                                  equal_contrib = False)
        predict_sg = np.argmax(posterior_sg,axis=0)
        spatio_factors.append(factor)
        accurs.append(permute_accuracy(predict_sg,sim_cell_type)[0])
        lls.append(ll)
    idx = np.argmax(accurs)
    fig = plt.figure()
    fig_collection['Accuracy_spatio_factor.png'] = fig
    plt.plot(spatio_factors,accurs)
    plt.xlabel("The spatio factor(gene factor is 1)")
    plt.ylabel("The permute accuracy.")
    plt.title("The permute accuracy across different spatio factor.")
    print("Best accuracy of gene+spatio model %.3f, with spatio factor %.3f"%(accurs[idx],spatio_factors[idx]))
    for fig_n, fig in fig_collection.items():
        fig.savefig(os.path.join(result_f,fig_n))
    return (ari_gene,ari_sg),(accur,accr_sg)