Exemple #1
0
def process_image(tiff_dir, mask_file, d):

    # read and aggregate data
    data = aggregate_image(tiff_dir, mask_file)
    X, exp = data['X'], data['exp']
    exp = filter_out(exp)

    # Get total counts per cell
    tot = pd.DataFrame(exp.sum(1))
    tot.columns = ['total_count']

    # remove cells with total count bellow 3
    X = X[tot.values > 3]
    exp = exp[tot.values > 3]
    tot = tot[tot.values > 3]

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(exp.T).T

    res = NaiveDE.regress_out(tot, dfm.T, 'np.log(total_count)').T

    # Add total_count as pseudogene for reference
    # res['log_total_count'] = np.log(tot['total_count'])

    res.to_csv(d+'/expressions.txt', sep=' ', header=True, index=False)
    X.to_csv(d+'/positions.txt', sep=',', header=False, index=False)
def main():
    df = pd.read_table('data/Layer2_BC_count_matrix-1.tsv', index_col=0)
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes
    sample_info = get_coords(df.index)
    sample_info['total_counts'] = df.sum(1)
    sample_info = sample_info.query(
        'total_counts > 5')  # Remove empty features
    df = df.loc[sample_info.index]

    X = sample_info[['x', 'y']]
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    # Add total_count as pseudogene for reference
    res['log_total_count'] = np.log(sample_info['total_counts'])

    results = SpatialDE.run(X, res)

    sample_info.to_csv('BC_sample_info.csv')
    results.to_csv('BC_final_results.csv')

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv('BC_MS_results.csv')

    return results
Exemple #3
0
def process_mer_fish(exp_file, annotation_file, d):

    # read data
    df = pd.read_csv(exp_file, index_col=0)
    annotations = pd.read_csv(annotation_file)['gene']

    # aggregate data
    tmp = aggregate_data(df, annotations)
    X, exp = tmp['X'], tmp['exp']

    # filter practically unobserved genes
    exp = exp.T[exp.sum(0) >= 3].T

    # Get total counts per cell
    tot = pd.DataFrame(exp.sum(1))
    tot.columns = ['total_count']

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(exp.T).T
    res = NaiveDE.regress_out(tot, dfm.T, 'np.log(total_count)').T

    # Add total_count as pseudogene for reference
    res['log_total_count'] = np.log(tot['total_count'])

    res.to_csv(d+'/expressions.txt', sep=' ', header=True, index=False)
    X.to_csv(d+'/positions.txt', sep=',', header=False, index=False)
Exemple #4
0
def main():
    # Get time points for each sample
    sample_info = pd.read_csv('Frog_sample_info.csv', index_col=0)

    # Load expression
    df = pd.read_csv('data/GSE65785_clutchApolyA_relative_TPM.csv',
                     index_col=0)
    df = df[sample_info.index]
    df = df[df.sum(1) >= 3]  # Filter practically unobserved genes

    X = sample_info[['hpf']]

    # Convert expression data to log scale, with genes in columns
    dfm = NaiveDE.stabilize(df)
    res = NaiveDE.regress_out(sample_info,
                              dfm,
                              'np.log(ERCC) + np.log(num_genes)',
                              rcond=1e-4).T

    # Add technical factors as pseudogenes for reference
    res['log_num_genes'] = np.log(sample_info['num_genes'])
    res['log_ERCC'] = np.log(sample_info['ERCC'])

    # Perform Spatial DE test with default settings
    results = SpatialDE.run(X, res)

    # Save results and annotation in files for interactive plotting and interpretation
    results.to_csv('Frog_final_results.csv')

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv('Frog_MS_results.csv')

    return results
Exemple #5
0
def main():
    df = pd.read_csv('exp_mat_43.csv', index_col=0)
    df.columns = df.columns.map(int)

    # Get coordinates for each sample
    sample_info = pd.read_csv('sample_info_43.csv', index_col=0)

    df = df[sample_info.index]

    X = sample_info[['x', 'y']]

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm, 'np.log(total_count)').T

    # Add total_count as pseudogene for reference
    res['log_total_count'] = np.log(sample_info['total_count'])

    # Perform Spatial DE test with default settings
    results = SpatialDE.run(X, res)

    # Save results and annotation in files for interactive plotting and interpretation
    results.to_csv('final_results_43.csv')

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv('MS_results_43.csv')

    return results
Exemple #6
0
def main():
    df = pd.read_csv('10t.csv', index_col=0)
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes

    # Get coordinates for each sample
    sample_info = get_coords(df.index)
    sample_info['total_counts'] = df.sum(1)
    sample_info = sample_info.query(
        'total_counts > 10')  # Remove empty features
    df = df.loc[sample_info.index]

    X = sample_info[['x', 'y']]

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    # Add total_count as pseudogene for reference
    res['log_total_count'] = np.log(sample_info['total_counts'])

    # Perform Spatial DE test with default settings
    results = SpatialDE.run(X, res)

    # Save results and annotation in files for interactive plotting and interpretation
    sample_info.to_csv('10t_sample_info.csv')
    results.to_csv('10t_final_results.csv')

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv('10t_MS_results.csv')

    return results
def main():
    df = pd.read_csv('data/rep6/middle_exp_mat.csv', index_col=0)
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes

    # Get coordinates for each sample
    sample_info = pd.read_csv('data/rep6/middle_sample_info.csv', index_col=0)
    df = df.loc[sample_info.index]

    X = sample_info[['abs_X', 'abs_Y']]

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_count)').T

    # Add total_count as pseudogene for reference
    res['log_total_count'] = np.log(sample_info['total_count'])

    # Perform Spatial DE test with default settings
    results = SpatialDE.run(X, res)

    # Assign pi_0 = 1 in multiple testing correction
    results['qval'] = SpatialDE.util.qvalue(results['pval'], pi0=1.0)

    # Save results and annotation in files for interactive plotting and interpretation
    sample_info.to_csv('middle_sample_info.csv')
    results.to_csv('middle_final_results.csv')

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv('middle_MS_results.csv')

    return results
Exemple #8
0
def main(expression_csv, coordinate_csv, results_csv, model_selection_csv):
    ''' Perform SpatialDE test on data in input files.

    <expression csv> : A CSV file with expression valies. Columns are genes,
    and Rows are samples

    <coordinates csv> : A CSV file with sample coordinates. Each row is a sample,
    the columns with coordinates must be named 'x' and 'y'. For other formats
    (e.g. 1d or 3d queries), it is recommended to write a custom Python
    script to do the analysis.

    <output file> : P-vaues and other relevant values for each gene
    will be stored in this file, in CSV format.

    '''
    df = pd.read_csv(expression_csv, index_col=0)

    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes

    sample_info = pd.read_csv(coordinate_csv, index_col=0)

    sample_info['total_counts'] = df.sum(1)
    sample_info = sample_info.query('total_counts > 5')  # Remove empty features

    df = df.loc[sample_info.index]
    X = sample_info[['x', 'y']]

    # Convert data to log-scale, and account for depth
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    # Perform Spatial DE test with default settings
    results = SpatialDE.run(X, res)

    # Save results and annotation in files for interactive plotting and interpretation
    results.to_csv(results_csv)

    if not model_selection_csv:
        return results

    de_results = results[(results.qval < 0.05)].copy()
    ms_results = SpatialDE.model_search(X, res, de_results)

    ms_results.to_csv(model_selection_csv)

    return results, ms_results
Exemple #9
0
def spatialde_test(adata,
                   coord_columns=['x', 'y'],
                   regress_formula='np.log(total_counts)'):
    ''' Run the SpatialDE test on an AnnData object

    Parameters
    ----------

    adata: An AnnData object with counts in the .X field.

    coord_columns: A list with the columns of adata.obs which represent spatial
                   coordinates. Default ['x', 'y'].

    regress_formula: A patsy formula for linearly regressing out fixed effects
                     from columns in adata.obs before fitting the SpatialDE models.
                     Default is 'np.log(total_counts)'.

    Returns
    -------

    results: A table of spatial statistics for each gene.
    '''
    logging.info('Performing VST for NB counts')
    adata.layers['stabilized'] = NaiveDE.stabilize(adata.X.T).T

    logging.info('Regressing out fixed effects')
    adata.layers['residual'] = NaiveDE.regress_out(
        adata.obs, adata.layers['stabilized'].T, regress_formula).T

    X = adata.obs[coord_columns].values
    expr_mat = pd.DataFrame.from_records(adata.layers['residual'],
                                         columns=adata.var.index,
                                         index=adata.obs.index)

    results = run(X, expr_mat)

    # Clip 0 pvalues
    min_pval = results.query('pval > 0')['pval'].min() / 2
    results['pval'] = results['pval'].clip_lower(min_pval)

    # Correct for multiple testing
    results['qval'] = qvalue(results['pval'], pi0=1.)

    return results
Exemple #10
0
def Spatial_DE_AEH(filterd_exprs,coordinates,results,pattern_num,l = 1.05, verbosity = 1):
    ## Automatic expression histology
        coordinates_cp =coordinates.copy()
        coordinates_cp['total_counts'] = filterd_exprs.sum(1)
        
        dfm = NaiveDE.stabilize(filterd_exprs.T).T
        res = NaiveDE.regress_out(coordinates_cp, dfm.T, 'np.log(total_counts)').T
        
        results['pval'] = results['pval'].clip(lower = results.query('pval > 0')['pval'].min() / 2)
        results['qval'] = results['qval'].clip(lower = results.query('qval > 0')['qval'].min() / 2)

        sres = results.query('qval < 0.05 & g != "log_total_count"').copy()
        
        X = coordinates.values

        histology_results, patterns = SpatialDE.spatial_patterns(X, res, sres, int(pattern_num), l = l,verbosity=verbosity)
        
        pattern_dic = {"histology_results":histology_results,"patterns":patterns}
        
        return pattern_dic
Exemple #11
0
def main():
    sample_info = pd.read_csv('MOB_sample_info.csv', index_col=0)

    df = pd.read_csv('data/Rep11_MOB_0.csv', index_col=0)
    df = df.loc[sample_info.index]
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes

    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    X = sample_info[['x', 'y']].values

    times = pd.DataFrame(columns=['N', 'time'])
    Ns = [50, 100, 200, 300, 500, 750, 1000, 2000]

    j = 0
    for N in Ns:
        for i in range(5):

            Y = res.sample(N, axis=1).values.T

            t0 = time()

            m = GPclust.MOHGP(X=X,
                              Y=Y,
                              kernF=kern.RBF(2) + kern.Bias(2),
                              kernY=kern.RBF(1) + kern.White(1),
                              K=5,
                              prior_Z='DP')

            m.hyperparam_opt_args['messages'] = False
            m.optimize(step_length=0.1, verbose=False, maxiter=2000)

            times.loc[j] = [N, time() - t0]
            print(times.loc[j])
            j += 1

    times.to_csv('AEH_times.csv')
def main(out_file):
    df = pd.read_table('../../BreastCancer/data/Layer2_BC_count_matrix-1.tsv',
                       index_col=0)
    df = df.T[df.sum(0) >= 3].T  # Filter practically unobserved genes
    sample_info = get_coords(df.index)
    sample_info['total_counts'] = df.sum(1)
    sample_info = sample_info.query(
        'total_counts > 5')  # Remove empty features

    # Bootstrap sampling 80% of data
    sample_info = sample_info.sample(frac=0.8)

    df = df.loc[sample_info.index]

    X = sample_info[['x', 'y']]
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T

    results = SpatialDE.run(X, res)

    results.to_csv(out_file)

    return results
Exemple #13
0
def Spatial_DE(filterd_exprs, coordinates):
    if(filterd_exprs.shape[0] != coordinates.shape[0]):
        sys.exit("The number of cells in expression file and location file don't match\n")
    else:
        ## results and ms_results
        coordinates_cp = coordinates.copy()
        coordinates_cp['total_counts'] = filterd_exprs.sum(1)
        
        dfm = NaiveDE.stabilize(filterd_exprs.T).T
        res = NaiveDE.regress_out(coordinates_cp, dfm.T, 'np.log(total_counts)').T
        res['log_total_count'] = np.log(coordinates_cp['total_counts'])
        
        results = SpatialDE.run(coordinates, res)
        
        de_results = results[(results.qval < 0.05)].copy()
        if(de_results.shape[0] > 0):
            ms_results = SpatialDE.model_search(coordinates, res, de_results)
            result_dic = {"results":results, "ms_results":ms_results}
        
        else:
            print("No spatially variable genes found! \n")
            result_dic = {"results":results}
        
        return result_dic
import glob
samp_counts = glob.glob(
    '/fastscratch/myscratch/shicks1/HumanPilot/sample_data/by_sample_id/*_counts.csv'
)
samp_meta = np.ravel([[x[:-10] + 'meta.csv'] for x in samp_counts])
samp_output = np.ravel([[x[:-10] + 'spatialDE_results.csv']
                        for x in samp_counts])
df = pd.DataFrame({
    'counts': samp_counts,
    'meta': samp_meta,
    'out': samp_output
})

for index, row in df.iterrows():
    counts = pd.read_csv(row['counts'], index_col=0)  # load counts
    counts = counts.T[counts.sum(
        axis=0) >= 5].T  # Filter practically unobserved genes
    sample_info = pd.read_csv(
        row['meta'], index_col=0)  # load meta data with spatial coordinates
    norm_expr = NaiveDE.stabilize(counts.T).T  # remove tech variation
    resid_expr = NaiveDE.regress_out(sample_info, norm_expr.T, 'np.log(sum)').T
    X = sample_info[['imagerow', 'imagecol']]
    now = datetime.now().strftime("%H:%M:%S")
    print(now)
    results = SpatialDE.run(X, resid_expr)
    now = datetime.now().strftime("%H:%M:%S")
    print(now)
    results.to_csv(row['out'])  # Save spatial results
    print("Finished =", row['counts'])
Exemple #15
0
import pandas as pd
import NaiveDE
import SpatialDE

counts = pd.read_csv('./processed_data/MERFISH_Animal18_Bregma0.11_countdata.csv', index_col=0)
counts = counts.T[counts.sum(0) >= 3].T 
sample_info = pd.read_csv('./processed_data/MERFISH_Animal18_Bregma0.11_info.csv', index_col=0)
sample_info['total_counts'] = counts.sum(1)
counts = counts.loc[sample_info.index]  
norm_expr = NaiveDE.stabilize(counts.T).T
resid_expr = NaiveDE.regress_out(sample_info, norm_expr.T, 'np.log(total_counts)').T
sample_resid_expr=resid_expr
X = sample_info[['x', 'y']]
results = SpatialDE.run(X, sample_resid_expr)
results.to_csv('./output/MERFISH_Animal18_Bregma0.11_spe.csv',sep=' ', index=False, header=True)

de_results = results[(results.qval < 0.05)].copy()
ms_results = SpatialDE.model_search(X, resid_expr, de_results)
ms_results.to_csv('./output/MERFISH_Animal18_Bregma0.11_ms_spe.csv',sep=' ', index=False, header=True)

import numpy as np
import NaiveDE
import SpatialDE
import time

info = pd.read_csv("../processed_data/Rep11_MOB_info_scgco.csv", index_col=0)
exp_diff = 1

for noise in [0.1, 0.2, 0.3, 0.4, 0.5, 0.6]:
    for irep in range(10):
        ff = "../processed_data/sim_MOB_expdiff" + str(
            exp_diff) + "_noise" + str(noise) + "_counts" + str(irep) + ".csv"
        print(ff)

        df = pd.read_csv(ff, index_col=0)
        df = df.T[df.sum(0) >= 3].T
        sample_info = info.copy()

        X = sample_info[['x', 'y']]

        start_time = time.time()
        dfm = NaiveDE.stabilize(df.T).T
        res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T
        res['log_total_count'] = np.log(sample_info['total_counts'])
        results = SpatialDE.run(X, res)

        ff = "../spatialde_results/sim_MOB_expdiff" + str(
            exp_diff) + "_noise" + str(noise) + "_counts" + str(
                irep) + "_spe.csv"
        results.to_csv(ff)
Exemple #17
0
def main():
    df = pd.read_csv('10t.csv', index_col=0)
    df = df.T[df.sum(0) >= 3].T
    sample_info = get_coords(df.index)
    sample_info['total_counts'] = df.sum(1)
    sample_info = sample_info.query(
        'total_counts > 10')  # Remove empty features
    df = df.loc[sample_info.index]
    # X = sample_info[['x', 'y']]
    dfm = NaiveDE.stabilize(df.T).T
    res = NaiveDE.regress_out(sample_info, dfm.T, 'np.log(total_counts)').T
    res['log_total_count'] = np.log(sample_info['total_counts'])
    results = pd.read_csv('10t_final_results.csv', index_col=0)

    results['pval'] = results['pval'].clip_lower(
        results.query('pval > 0')['pval'].min() / 2)
    results['qval'] = results['qval'].clip_lower(
        results.query('qval > 0')['qval'].min() / 2)
    ymy = int(sys.argv[1])
    sres = results.query('qval < 0.05 & g != "log_total_count"').copy()
    #a = sres['l'].value_counts()
    #a.to_csv('10t_l_results.csv')
    X = sample_info[['x', 'y']].values
    histology_results, patterns = SpatialDE.spatial_patterns(X,
                                                             res,
                                                             sres,
                                                             ymy,
                                                             11,
                                                             verbosity=1)
    histology_results.to_csv('10t_AEH_results.{}.csv'.format(ymy))
    patterns.add_prefix('pattern_').to_csv(
        '10t_pattern_results.{}.csv'.format(ymy))
    for i, Ci in enumerate(
            histology_results.sort_values('pattern').pattern.unique()):
        fig = plt.figure(figsize=(5, 5))
        plt.scatter(sample_info['x'],
                    sample_info['y'],
                    c=patterns[Ci],
                    s=10,
                    cmap=plt.get_cmap("YlOrBr"),
                    edgecolor="none",
                    marker='s')
        plt.axis([0, 50, 0, 50])
        plt.xlim(0, 50)
        plt.ylim(0, 50)
        plt.xticks([0, 10, 20, 30, 40, 50])
        plt.yticks([0, 10, 20, 30, 40, 50])
        plt.axis('equal')
        plt.gca().invert_yaxis()
        plt.title('Pattern {} - {} genes'.format(
            i,
            histology_results.query('pattern == @i').shape[0]),
                  size=20)
        plt.tight_layout()
        plt.savefig("10t.{}.{}.pdf".format(ymy, i), bbox_inches='tight')
    for i in histology_results.sort_values('pattern').pattern.unique():
        print('Pattern {}'.format(i))
        print(', '.join(
            histology_results.query('pattern == @i').sort_values('membership')
            ['g'].tolist()))
        print()

    return histology_results
Exemple #18
0
import pandas as pd
import numpy as np
import NaiveDE,SpatialDE
from somde import SomNode

dataname = '../slideseq_data/Puck_180819_11_'
df = pd.read_csv(dataname+'count.csv',sep=',',index_col=1)
corinfo = pd.read_csv(dataname+'idx.csv',sep=',',index_col=0)
del(df['ENSEMBL'])
print(df.shape)
corinfo["total_count"]=df.sum(0)
# stablize,regress_out is gene by cell . However,  run is cell by gene
dfm = NaiveDE.stabilize(df)
res = NaiveDE.regress_out(corinfo, dfm, 'np.log(total_count)').T
X=corinfo[['x','y']].values.astype(np.float32)
som4 = SomNode(X,20)
ndf,ninfo = som4.mtx(df)
r1 ,numberq =som4.run()
nres = som4.norm()
som4.view()