예제 #1
0
def process_mer_fish(exp_file, annotation_file, d):

    # read data
    df = pd.read_csv(exp_file, index_col=0)
    annotations = pd.read_csv(annotation_file)['gene']

    # aggregate data
    tmp = aggregate_data(df, annotations)
    X, exp = tmp['X'], tmp['exp']

    # filter practically unobserved genes
    exp = exp.T[exp.sum(0) >= 3].T

    # Get total counts per cell
    tot = pd.DataFrame(exp.sum(1))
    tot.columns = ['total_count']

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(exp.T).T
    res = NaiveDE.regress_out(tot, dfm.T, 'np.log(total_count)').T

    # Add total_count as pseudogene for reference
    res['log_total_count'] = np.log(tot['total_count'])

    res.to_csv(d+'/expressions.txt', sep=' ', header=True, index=False)
    X.to_csv(d+'/positions.txt', sep=',', header=False, index=False)
def main():
    df = pd.read_csv('data/rep6/middle_exp_mat.csv', index_col=0)
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes

    # Get coordinates for each sample
    sample_info = pd.read_csv('data/rep6/middle_sample_info.csv', index_col=0)
    df = df.loc[sample_info.index]

    X = sample_info[['abs_X', 'abs_Y']]

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_count)').T

    # Add total_count as pseudogene for reference
    res['log_total_count'] = np.log(sample_info['total_count'])

    # Perform Spatial DE test with default settings
    results = SpatialDE.run(X, res)

    # Assign pi_0 = 1 in multiple testing correction
    results['qval'] = SpatialDE.util.qvalue(results['pval'], pi0=1.0)

    # Save results and annotation in files for interactive plotting and interpretation
    sample_info.to_csv('middle_sample_info.csv')
    results.to_csv('middle_final_results.csv')

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv('middle_MS_results.csv')

    return results
예제 #3
0
def main():
    df = pd.read_csv('10t.csv', index_col=0)
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes

    # Get coordinates for each sample
    sample_info = get_coords(df.index)
    sample_info['total_counts'] = df.sum(1)
    sample_info = sample_info.query(
        'total_counts > 10')  # Remove empty features
    df = df.loc[sample_info.index]

    X = sample_info[['x', 'y']]

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    # Add total_count as pseudogene for reference
    res['log_total_count'] = np.log(sample_info['total_counts'])

    # Perform Spatial DE test with default settings
    results = SpatialDE.run(X, res)

    # Save results and annotation in files for interactive plotting and interpretation
    sample_info.to_csv('10t_sample_info.csv')
    results.to_csv('10t_final_results.csv')

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv('10t_MS_results.csv')

    return results
예제 #4
0
def process_image(tiff_dir, mask_file, d):

    # read and aggregate data
    data = aggregate_image(tiff_dir, mask_file)
    X, exp = data['X'], data['exp']
    exp = filter_out(exp)

    # Get total counts per cell
    tot = pd.DataFrame(exp.sum(1))
    tot.columns = ['total_count']

    # remove cells with total count bellow 3
    X = X[tot.values > 3]
    exp = exp[tot.values > 3]
    tot = tot[tot.values > 3]

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(exp.T).T

    res = NaiveDE.regress_out(tot, dfm.T, 'np.log(total_count)').T

    # Add total_count as pseudogene for reference
    # res['log_total_count'] = np.log(tot['total_count'])

    res.to_csv(d+'/expressions.txt', sep=' ', header=True, index=False)
    X.to_csv(d+'/positions.txt', sep=',', header=False, index=False)
예제 #5
0
def main():
    df = pd.read_table('data/Layer2_BC_count_matrix-1.tsv', index_col=0)
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes
    sample_info = get_coords(df.index)
    sample_info['total_counts'] = df.sum(1)
    sample_info = sample_info.query(
        'total_counts > 5')  # Remove empty features
    df = df.loc[sample_info.index]

    X = sample_info[['x', 'y']]
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    # Add total_count as pseudogene for reference
    res['log_total_count'] = np.log(sample_info['total_counts'])

    results = SpatialDE.run(X, res)

    sample_info.to_csv('BC_sample_info.csv')
    results.to_csv('BC_final_results.csv')

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv('BC_MS_results.csv')

    return results
예제 #6
0
def main():
    # Get time points for each sample
    sample_info = pd.read_csv('Frog_sample_info.csv', index_col=0)

    # Load expression
    df = pd.read_csv('data/GSE65785_clutchApolyA_relative_TPM.csv',
                     index_col=0)
    df = df[sample_info.index]
    df = df[df.sum(1) >= 3]  # Filter practically unobserved genes

    X = sample_info[['hpf']]

    # Convert expression data to log scale, with genes in columns
    dfm = NaiveDE.stabilize(df)
    res = NaiveDE.regress_out(sample_info,
                              dfm,
                              'np.log(ERCC) + np.log(num_genes)',
                              rcond=1e-4).T

    # Add technical factors as pseudogenes for reference
    res['log_num_genes'] = np.log(sample_info['num_genes'])
    res['log_ERCC'] = np.log(sample_info['ERCC'])

    # Perform Spatial DE test with default settings
    results = SpatialDE.run(X, res)

    # Save results and annotation in files for interactive plotting and interpretation
    results.to_csv('Frog_final_results.csv')

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv('Frog_MS_results.csv')

    return results
예제 #7
0
def main():
    df = pd.read_csv('exp_mat_43.csv', index_col=0)
    df.columns = df.columns.map(int)

    # Get coordinates for each sample
    sample_info = pd.read_csv('sample_info_43.csv', index_col=0)

    df = df[sample_info.index]

    X = sample_info[['x', 'y']]

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm, 'np.log(total_count)').T

    # Add total_count as pseudogene for reference
    res['log_total_count'] = np.log(sample_info['total_count'])

    # Perform Spatial DE test with default settings
    results = SpatialDE.run(X, res)

    # Save results and annotation in files for interactive plotting and interpretation
    results.to_csv('final_results_43.csv')

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv('MS_results_43.csv')

    return results
예제 #8
0
def main(expression_csv, coordinate_csv, results_csv, model_selection_csv):
    ''' Perform SpatialDE test on data in input files.

    <expression csv> : A CSV file with expression valies. Columns are genes,
    and Rows are samples

    <coordinates csv> : A CSV file with sample coordinates. Each row is a sample,
    the columns with coordinates must be named 'x' and 'y'. For other formats
    (e.g. 1d or 3d queries), it is recommended to write a custom Python
    script to do the analysis.

    <output file> : P-vaues and other relevant values for each gene
    will be stored in this file, in CSV format.

    '''
    df = pd.read_csv(expression_csv, index_col=0)

    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes

    sample_info = pd.read_csv(coordinate_csv, index_col=0)

    sample_info['total_counts'] = df.sum(1)
    sample_info = sample_info.query('total_counts > 5')  # Remove empty features

    df = df.loc[sample_info.index]
    X = sample_info[['x', 'y']]

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    # Perform Spatial DE test with default settings
    results = SpatialDE.run(X, res)

    # Save results and annotation in files for interactive plotting and interpretation
    results.to_csv(results_csv)

    if not model_selection_csv:
        return results

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv(model_selection_csv)

    return results, ms_results
예제 #9
0
def spatialde_test(adata,
                   coord_columns=['x', 'y'],
                   regress_formula='np.log(total_counts)'):
    ''' Run the SpatialDE test on an AnnData object

    Parameters
    ----------

    adata: An AnnData object with counts in the .X field.

    coord_columns: A list with the columns of adata.obs which represent spatial
                   coordinates. Default ['x', 'y'].

    regress_formula: A patsy formula for linearly regressing out fixed effects
                     from columns in adata.obs before fitting the SpatialDE models.
                     Default is 'np.log(total_counts)'.

    Returns
    -------

    results: A table of spatial statistics for each gene.
    '''
    logging.info('Performing VST for NB counts')
    adata.layers['stabilized'] = NaiveDE.stabilize(adata.X.T).T

    logging.info('Regressing out fixed effects')
    adata.layers['residual'] = NaiveDE.regress_out(
        adata.obs, adata.layers['stabilized'].T, regress_formula).T

    X = adata.obs[coord_columns].values
    expr_mat = pd.DataFrame.from_records(adata.layers['residual'],
                                         columns=adata.var.index,
                                         index=adata.obs.index)

    results = run(X, expr_mat)

    # Clip 0 pvalues
    min_pval = results.query('pval > 0')['pval'].min() / 2
    results['pval'] = results['pval'].clip_lower(min_pval)

    # Correct for multiple testing
    results['qval'] = qvalue(results['pval'], pi0=1.)

    return results
예제 #10
0
def Spatial_DE_AEH(filterd_exprs,coordinates,results,pattern_num,l = 1.05, verbosity = 1):
    ## Automatic expression histology
        coordinates_cp =coordinates.copy()
        coordinates_cp['total_counts'] = filterd_exprs.sum(1)
        
        dfm = NaiveDE.stabilize(filterd_exprs.T).T
        res = NaiveDE.regress_out(coordinates_cp, dfm.T, 'np.log(total_counts)').T
        
        results['pval'] = results['pval'].clip(lower = results.query('pval > 0')['pval'].min() / 2)
        results['qval'] = results['qval'].clip(lower = results.query('qval > 0')['qval'].min() / 2)

        sres = results.query('qval < 0.05 & g != "log_total_count"').copy()
        
        X = coordinates.values

        histology_results, patterns = SpatialDE.spatial_patterns(X, res, sres, int(pattern_num), l = l,verbosity=verbosity)
        
        pattern_dic = {"histology_results":histology_results,"patterns":patterns}
        
        return pattern_dic
예제 #11
0
def main():
    sample_info = pd.read_csv('MOB_sample_info.csv', index_col=0)

    df = pd.read_csv('data/Rep11_MOB_0.csv', index_col=0)
    df = df.loc[sample_info.index]
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes

    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    X = sample_info[['x', 'y']].values

    times = pd.DataFrame(columns=['N', 'time'])
    Ns = [50, 100, 200, 300, 500, 750, 1000, 2000]

    j = 0
    for N in Ns:
        for i in range(5):

            Y = res.sample(N, axis=1).values.T

            t0 = time()

            m = GPclust.MOHGP(X=X,
                              Y=Y,
                              kernF=kern.RBF(2) + kern.Bias(2),
                              kernY=kern.RBF(1) + kern.White(1),
                              K=5,
                              prior_Z='DP')

            m.hyperparam_opt_args['messages'] = False
            m.optimize(step_length=0.1, verbose=False, maxiter=2000)

            times.loc[j] = [N, time() - t0]
            print(times.loc[j])
            j += 1

    times.to_csv('AEH_times.csv')
예제 #12
0
def main(out_file):
    df = pd.read_table('../../BreastCancer/data/Layer2_BC_count_matrix-1.tsv',
                       index_col=0)
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes
    sample_info = get_coords(df.index)
    sample_info['total_counts'] = df.sum(1)
    sample_info = sample_info.query(
        'total_counts > 5')  # Remove empty features

    # Bootstrap sampling 80% of data
    sample_info = sample_info.sample(frac=0.8)

    df = df.loc[sample_info.index]

    X = sample_info[['x', 'y']]
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    results = SpatialDE.run(X, res)

    results.to_csv(out_file)

    return results
예제 #13
0
def Spatial_DE(filterd_exprs, coordinates):
    if(filterd_exprs.shape[0] != coordinates.shape[0]):
        sys.exit("The number of cells in expression file and location file don't match\n")
    else:
        ## results and ms_results
        coordinates_cp = coordinates.copy()
        coordinates_cp['total_counts'] = filterd_exprs.sum(1)
        
        dfm = NaiveDE.stabilize(filterd_exprs.T).T
        res = NaiveDE.regress_out(coordinates_cp, dfm.T, 'np.log(total_counts)').T
        res['log_total_count'] = np.log(coordinates_cp['total_counts'])
        
        results = SpatialDE.run(coordinates, res)
        
        de_results = results[(results.qval < 0.05)].copy()
        if(de_results.shape[0] > 0):
            ms_results = SpatialDE.model_search(coordinates, res, de_results)
            result_dic = {"results":results, "ms_results":ms_results}
        
        else:
            print("No spatially variable genes found! \n")
            result_dic = {"results":results}
        
        return result_dic
예제 #14
0
import pandas as pd
import NaiveDE
import SpatialDE

counts = pd.read_csv('./processed_data/MERFISH_Animal18_Bregma0.11_countdata.csv', index_col=0)
counts = counts.T[counts.sum(0) >= 3].T 
sample_info = pd.read_csv('./processed_data/MERFISH_Animal18_Bregma0.11_info.csv', index_col=0)
sample_info['total_counts'] = counts.sum(1)
counts = counts.loc[sample_info.index]  
norm_expr = NaiveDE.stabilize(counts.T).T
resid_expr = NaiveDE.regress_out(sample_info, norm_expr.T, 'np.log(total_counts)').T
sample_resid_expr=resid_expr
X = sample_info[['x', 'y']]
results = SpatialDE.run(X, sample_resid_expr)
results.to_csv('./output/MERFISH_Animal18_Bregma0.11_spe.csv',sep=' ', index=False, header=True)

de_results = results[(results.qval < 0.05)].copy()
ms_results = SpatialDE.model_search(X, resid_expr, de_results)
ms_results.to_csv('./output/MERFISH_Animal18_Bregma0.11_ms_spe.csv',sep=' ', index=False, header=True)

예제 #15
0
def main():
    df = pd.read_csv('10t.csv', index_col=0)
    df = df.T[df.sum(0) >= 3].T
    sample_info = get_coords(df.index)
    sample_info['total_counts'] = df.sum(1)
    sample_info = sample_info.query(
        'total_counts > 10')  # Remove empty features
    df = df.loc[sample_info.index]
    # X = sample_info[['x', 'y']]
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T
    res['log_total_count'] = np.log(sample_info['total_counts'])
    results = pd.read_csv('10t_final_results.csv', index_col=0)

    results['pval'] = results['pval'].clip_lower(
        results.query('pval > 0')['pval'].min() / 2)
    results['qval'] = results['qval'].clip_lower(
        results.query('qval > 0')['qval'].min() / 2)
    ymy = int(sys.argv[1])
    sres = results.query('qval < 0.05 & g != "log_total_count"').copy()
    #a = sres['l'].value_counts()
    #a.to_csv('10t_l_results.csv')
    X = sample_info[['x', 'y']].values
    histology_results, patterns = SpatialDE.spatial_patterns(X,
                                                             res,
                                                             sres,
                                                             ymy,
                                                             11,
                                                             verbosity=1)
    histology_results.to_csv('10t_AEH_results.{}.csv'.format(ymy))
    patterns.add_prefix('pattern_').to_csv(
        '10t_pattern_results.{}.csv'.format(ymy))
    for i, Ci in enumerate(
            histology_results.sort_values('pattern').pattern.unique()):
        fig = plt.figure(figsize=(5, 5))
        plt.scatter(sample_info['x'],
                    sample_info['y'],
                    c=patterns[Ci],
                    s=10,
                    cmap=plt.get_cmap("YlOrBr"),
                    edgecolor="none",
                    marker='s')
        plt.axis([0, 50, 0, 50])
        plt.xlim(0, 50)
        plt.ylim(0, 50)
        plt.xticks([0, 10, 20, 30, 40, 50])
        plt.yticks([0, 10, 20, 30, 40, 50])
        plt.axis('equal')
        plt.gca().invert_yaxis()
        plt.title('Pattern {} - {} genes'.format(
            i,
            histology_results.query('pattern == @i').shape[0]),
                  size=20)
        plt.tight_layout()
        plt.savefig("10t.{}.{}.pdf".format(ymy, i), bbox_inches='tight')
    for i in histology_results.sort_values('pattern').pattern.unique():
        print('Pattern {}'.format(i))
        print(', '.join(
            histology_results.query('pattern == @i').sort_values('membership')
            ['g'].tolist()))
        print()

    return histology_results
예제 #16
0
receptors_proteins = receptors_proteins[sub]
ligand_proteins = ligand_proteins[sub]

eomes_ligand_genes = [np.array(cellPhoneDB_geneInput['gene_name'])[np.array(cellPhoneDB_geneInput['uniprot'] == ligand_proteins[i])][0] for i in range(len(ligand_proteins))]

eomes_receptor_genes = [np.array(cellPhoneDB_geneInput['gene_name'])[np.array(cellPhoneDB_geneInput['uniprot'] == receptors_proteins[i])][0] for i in range(len(ligand_proteins))]

print(eomes_receptor_genes[eomes_ligand_genes == 'CRH'])
print(eomes_receptor_genes[eomes_ligand_genes == 'ERBB4'])


eomes_ligand_genes = np.unique(eomes_ligand_genes)

norm_expr = NaiveDE.stabilize(counts.T).T

resid_expr = NaiveDE.regress_out(sample_info, norm_expr, 'np.log(Q3_counts)').T
subset_eomesLigands = [resid_expr.keys()[i] in eomes_ligand_genes for i in range(len(resid_expr.keys()))]
resid_expr_subset = resid_expr.loc[subset_rois, subset_eomesLigands]

X = sample_info[['x', 'y']][subset_rois]
results = SpatialDE.run(X, resid_expr_subset)

results = results.sort_values('qval')
genes_ranked = np.array(results['g'])
genes_significant = sum(multi.multipletests(results['pval'], method = 'fdr_bh')[1] < 0.05)

results['FDR'] = multi.multipletests(results['pval'], method = 'fdr_bh')[1]

# Save results:

colours = np.repeat('blue', sum(subset_rois))
예제 #17
0
    counts_subset = counts.loc[:,relevantSangerIDs]
    properties_subset = properties.loc[relevantIDs,:]    
    geneCounts = [sum([counts_subset.iloc[i,j] > properties_subset.iloc[j,19] for j in range(noSamples)])/noSamples for i in range(np.shape(counts_subset)[0])]
    
    goodGenes = counts_subset.index[np.array(geneCounts) > threshold]
    
    # Recalculate FDR based on those genes only:

    results_subset = results[[results['g'].iloc[i] in goodGenes for i in range(len(results['g']))]]
    results_subset['FDR'] = np.array(multi.multipletests(np.array(results_subset['pval']), method = 'fdr_bh')[1])
    topGenes[AOI_type] = np.array(results_subset['g'])[0:100]
    numberOfTopsGenes.append(sum(results_subset['FDR'] < 0.05))
    
    norm_expr = NaiveDE.stabilize(counts.T).T

    counts_Q3 = np.array(NaiveDE.regress_out(sample_info, norm_expr, 'np.log(Q3_counts)').T).T

    genes = topGenes[AOI_type]
    genes = genes.iloc[0:30]

    print(AOI_type)
    figure(num=None, figsize=(5, len(genes)*5), dpi=80, facecolor='w', edgecolor='k')
    for i in range(len(genes)):
        plt.subplot(len(genes),1, i + 1)
        sub_subset = [metadata['slide'][i] in rSlides and metadata['AOI_type'][i] == AOI_type for i in range(len(metadata['slide']))]
        plt.scatter(sample_info['x'][sub_subset], counts_Q3[np.where(np.array(counts.index) == genes.iloc[i]),sub_subset], label = genes.iloc[i] + '  EOMESpos', c = 'blue')
        sub_subset = [metadata['slide'][i] in rSlides and metadata['AOI_type'][i] == 'Ring' for i in range(len(metadata['slide']))]
        plt.scatter(sample_info['x'][sub_subset], counts_Q3[np.where(np.array(counts.index) == genes.iloc[i]),sub_subset], label = genes.iloc[i] + '  Ring', c = 'red')
        plt.title(genes.iloc[i])
        plt.xlabel('CorticalDepth')
        plt.ylabel('Normalized Count')
예제 #18
0
import numpy as np
import NaiveDE
import SpatialDE
import time

info = pd.read_csv("../processed_data/Rep11_MOB_info_scgco.csv", index_col=0)
exp_diff = 1

for noise in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]:
    for irep in range(10):
        ff = "../processed_data/sim_MOB_expdiff" + str(
            exp_diff) + "_noise" + str(noise) + "_counts" + str(irep) + ".csv"
        print(ff)

        df = pd.read_csv(ff, index_col=0)
        df = df.T[df.sum(0) >= 3].T
        sample_info = info.copy()

        X = sample_info[['x', 'y']]

        start_time = time.time()
        dfm = NaiveDE.stabilize(df.T).T
        res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T
        res['log_total_count'] = np.log(sample_info['total_counts'])
        results = SpatialDE.run(X, res)

        ff = "../spatialde_results/sim_MOB_expdiff" + str(
            exp_diff) + "_noise" + str(noise) + "_counts" + str(
                irep) + "_spe.csv"
        results.to_csv(ff)
예제 #19
0
import glob
samp_counts = glob.glob(
    '/fastscratch/myscratch/shicks1/HumanPilot/sample_data/by_sample_id/*_counts.csv'
)
samp_meta = np.ravel([[x[:-10] + 'meta.csv'] for x in samp_counts])
samp_output = np.ravel([[x[:-10] + 'spatialDE_results.csv']
                        for x in samp_counts])
df = pd.DataFrame({
    'counts': samp_counts,
    'meta': samp_meta,
    'out': samp_output
})

for index, row in df.iterrows():
    counts = pd.read_csv(row['counts'], index_col=0)  # load counts
    counts = counts.T[counts.sum(
        axis=0) >= 5].T  # Filter practically unobserved genes
    sample_info = pd.read_csv(
        row['meta'], index_col=0)  # load meta data with spatial coordinates
    norm_expr = NaiveDE.stabilize(counts.T).T  # remove tech variation
    resid_expr = NaiveDE.regress_out(sample_info, norm_expr.T, 'np.log(sum)').T
    X = sample_info[['imagerow', 'imagecol']]
    now = datetime.now().strftime("%H:%M:%S")
    print(now)
    results = SpatialDE.run(X, resid_expr)
    now = datetime.now().strftime("%H:%M:%S")
    print(now)
    results.to_csv(row['out'])  # Save spatial results
    print("Finished =", row['counts'])
예제 #20
0
resize_X = np.asarray(resize_X)
new_X = resize_X.copy()
new_X = new_X.reshape((n, 1, 48, 48))
del resize_X, a, b, i

##-----------------------------------------------------------------------------
n, _, a, b = new_X.shape
counts = pd.DataFrame(new_X.reshape(n, a * b)).T
counts.columns = gene.index

totals = np.sum(counts, axis=1)
bin1 = np.repeat(np.array([i for i in range(a)]), b)
bin2 = np.tile(np.array([i for i in range(b)]), a)
samples = pd.DataFrame({'x': bin1, 'y': bin2, 'total_counts': totals})

resid_expr = NaiveDE.regress_out(samples, counts.T, 'np.log(total_counts+1)').T
new_X = resid_expr.T.values.reshape((n, 1, 48, 48))

##-----------------------------------------------------------------------------
##training
##this only will train one ConvNet. For ensemble learning, repeat this 5 times

#use_cuda = torch.cuda.is_available()
output_dim = 30  ##training with 30 clusters

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = ConvNet(out_dim=output_dim)
net.apply(weights_init)

num_epoch = 31
batch_size = 128
예제 #21
0
import pandas as pd
import numpy as np
import NaiveDE,SpatialDE
from somde import SomNode

dataname = '../slideseq_data/Puck_180819_11_'
df = pd.read_csv(dataname+'count.csv',sep=',',index_col=1)
corinfo = pd.read_csv(dataname+'idx.csv',sep=',',index_col=0)
del(df['ENSEMBL'])
print(df.shape)
corinfo["total_count"]=df.sum(0)
# stablize,regress_out is gene by cell . However,  run is cell by gene
dfm = NaiveDE.stabilize(df)
res = NaiveDE.regress_out(corinfo, dfm, 'np.log(total_count)').T
X=corinfo[['x','y']].values.astype(np.float32)
som4 = SomNode(X,20)
ndf,ninfo = som4.mtx(df)
r1 ,numberq =som4.run()
nres = som4.norm()
som4.view()