def test_embark_supplemental(self, study, tmpdir): import flotilla study_name = 'test_save_supplemental' study.supplemental.expression_corr = study.expression.data.corr() study.save(study_name, flotilla_dir=tmpdir) study2 = flotilla.embark(study_name, flotilla_dir=tmpdir) pdt.assert_frame_equal(study2.supplemental.expression_corr, study.supplemental.expression_corr)
def begin( flotilla_project="http://sauron.ucsd.edu/flotilla_projects/" "neural_diff_chr22"): import flotilla study = flotilla.embark(flotilla_project) rpkms = study.expression.data psi = study.splicing.data rbp_genes = study.expression.feature_sets['rbp'] rbpRpkms = rpkms[rbp_genes].fillna(0) project_id = flotilla_project.split("/")[-1] mongo_con, mongodb = get_mongo_db(project_id) sys.stderr.write("finished loading study_data\n") return (psi, rbpRpkms, mongodb)
import pandas as pd import flotilla import os import sys sns.set(style='ticks', context='paper', rc={'font.sans-serif':'Arial', 'pdf.fonttype': 42}) # Ensure the iteration is always at least 1 iteration = max(int(sys.argv[1]), 1) iteration_str = str(iteration).zfill(4) base_folder = '/home/obotvinnik/Dropbox/figures2/singlecell_pnm/figure2_modalities/bayesian/permutations' seed = (sum(ord(c) for c in 'randomly_permute_modalities')/i) % 5437 np.random.seed(seed) study = flotilla.embark('singlecell_pnm_figure1_supplementary_post_splicing_filtering') not_outliers = study.splicing.singles.index.difference(study.splicing.outliers.index) print splicing_singles_no_outliers.shape splicing_singles_no_outliers = splicing_singles_no_outliers.groupby( study.sample_id_to_phenotype).apply(lambda x: x.dropna(thresh=20, axis=1)) print splicing_singles_no_outliers.shape permuted_psi = splicing_singles_no_outliers.groupby(study.sample_id_to_phenotype).apply( lambda x: pd.DataFrame(np.random.permutation(x), index=x.index, columns=x.columns)) bayesian = anchor.BayesianModalities() modality_assignments = permuted_psi.groupby(study.sample_id_to_phenotype).apply(bayesian.fit_predict)
""" Plot the modality log-likelihoods and barplots during estimation ================================================================ See also -------- :py:func:`Study.plot_event_modality_estimation` """ import flotilla study = flotilla.embark('shalek2013') study.plot_event_modality_estimation('chr8:97356415:97356600:-@chr8:97355689:97355825:-@chr8:97353054:97353130:-@chr8:97352177:97352339:-')
import matplotlib.pyplot as plt import seaborn as sns import numpy as np import pandas as pd sns.set(style='ticks', context='paper', rc={ 'font.sans-serif': 'Arial', 'pdf.fonttype': 42 }) import flotilla flotilla_dir = '/projects/ps-yeolab/obotvinnik/flotilla_projects' study = flotilla.embark('singlecell_pnm_figure2_modalities_bayesian_kmers', flotilla_dir=flotilla_dir) corr = study.supplemental.kmer_zscores.fillna(0).T.corr() print corr.shape corr = corr.dropna(how='all', axis=1).dropna(how='all', axis=0) print corr.shape folder = '/home/obotvinnik/Dropbox/figures2/singlecell_pnm/figure2_modalities/bayesian' figure_folder = '{}/kmer_counting'.format(folder) g = sns.clustermap(corr) g.savefig( '{}/modality_kmer_scores_pearson_correlated_clustermap_featurewise.pdf'. format(figure_folder))
def study(example_datapackage_path): import flotilla return flotilla.embark(example_datapackage_path)
""" Plot bar graphs of percentage of splicing events in each modality ================================================================= See also -------- :py:func:`Study.plot_modalities_bars` """ import flotilla study = flotilla.embark(flotilla._shalek2013) study.plot_modalities_lavalamps()
""" Compare gene expression in two features ====================================== """ import flotilla study = flotilla.embark(flotilla._brainspan) study.plot_two_features('FOXP1', 'FOXJ1')
def test_save(self, example_datapackage_path, tmpdir, monkeypatch): import flotilla from flotilla.external import get_resource_from_name study = flotilla.embark(example_datapackage_path, load_species_data=False) study_name = 'test_save' study.save(study_name, flotilla_dir=tmpdir) assert len(tmpdir.listdir()) == 1 save_dir = tmpdir.listdir()[0] with open('{}/datapackage.json'.format(save_dir)) as f: test_datapackage = json.load(f) with open(example_datapackage_path) as f: true_datapackage = json.load(f) assert study_name == save_dir.purebasename resource_keys_to_ignore = ('compression', 'format', 'path') keys_from_study = { 'splicing': ['feature_rename_col'], 'expression': ['feature_rename_col', 'log_base'], 'metadata': ['phenotype_order', 'phenotype_to_color', 'phenotype_col'], 'mapping_stats': [u'number_mapped_col'] } resource_names = ('metadata', 'expression', 'splicing', 'mapping_stats', 'spikein') # Add auto-generated attributes into the true datapackage for name, keys in keys_from_study.iteritems(): resource = get_resource_from_name(true_datapackage, name) for key in keys: monkeypatch.setitem(resource, key, eval('study.{}.{}'.format(name, key))) version = semantic_version.Version(study.version) version.patch += 1 assert str(version) == test_datapackage['datapackage_version'] assert study_name == test_datapackage['name'] datapackage_keys_to_ignore = [ 'name', 'datapackage_version', 'resources' ] datapackages = (true_datapackage, test_datapackage) for name in resource_names: for datapackage in datapackages: resource = get_resource_from_name(datapackage, name) for key in resource_keys_to_ignore: monkeypatch.delitem(resource, key, raising=False) # Have to check for resources separately because they could be in any # order, it just matters that the contents are equal assert sorted(true_datapackage['resources']) == sorted( test_datapackage['resources']) for key in datapackage_keys_to_ignore: for datapackage in datapackages: monkeypatch.delitem(datapackage, key) pdt.assert_dict_equal(test_datapackage, true_datapackage)
def test_embark(example_datapackage_path): test_study = flotilla.embark(example_datapackage_path)
def test_real_init(self, example_datapackage_path): import flotilla flotilla.embark(example_datapackage_path, load_species_data=False)
def scrambled_study(): import flotilla return flotilla.embark(flotilla._test_data)
def shalek2013(): import flotilla return flotilla.embark(flotilla._shalek2013)
def test_embark(shalek2013_datapackage_path): test_study = flotilla.embark(shalek2013_datapackage_path)
def test_real_init(self, shalek2013_datapackage_path): import flotilla flotilla.embark(shalek2013_datapackage_path, load_species_data=False)
""" Plot the modality log-likelihoods and barplots during estimation ================================================================ See also -------- :py:func:`Study.plot_event_modality_estimation` """ import flotilla study = flotilla.embark('shalek2013') study.plot_event_modality_estimation( 'chr8:97356415:97356600:-@chr8:97355689:97355825:-@chr8:97353054:97353130:-@chr8:97352177:97352339:-' )
""" Perform classification on categorical traits ============================================ See also -------- :py:func:`Study.interactive_classifier` """ import flotilla study = flotilla.embark(flotilla._shalek2013) study.plot_classifier('phenotype: Immature BDMC')
def test_save(self, example_datapackage_path, example_datapackage, tmpdir, monkeypatch): import flotilla from flotilla.datapackage import get_resource_from_name study = flotilla.embark(example_datapackage_path, load_species_data=False) study_name = 'test_save' study.save(study_name, flotilla_dir=tmpdir) assert len(tmpdir.listdir()) == 1 save_dir = tmpdir.listdir()[0] with open('{}/datapackage.json'.format(save_dir)) as f: test_datapackage = json.load(f) true_datapackage = example_datapackage.copy() assert study_name == save_dir.purebasename resource_keys_to_ignore = ('compression', 'format', 'path', 'url') keys_from_study = {'splicing': [], 'expression': ['thresh', 'log_base'], 'metadata': ['phenotype_order', 'phenotype_to_color', 'phenotype_col', 'phenotype_to_marker', 'pooled_col', 'minimum_samples'], 'mapping_stats': ['number_mapped_col'], 'expression_feature': ['rename_col', 'ignore_subset_cols'], 'splicing_feature': ['rename_col', 'ignore_subset_cols']} resource_names = keys_from_study.keys() # Add auto-generated attributes into the true datapackage for name, keys in keys_from_study.iteritems(): resource = get_resource_from_name(true_datapackage, name) for key in keys: if 'feature' in name: command = 'study.{}.feature_{}'.format(name.rstrip( '_feature'), key) else: command = 'study.{}.{}'.format(name, key) monkeypatch.setitem(resource, key, eval(command)) version = semantic_version.Version(study.version) version.patch += 1 assert str(version) == test_datapackage['datapackage_version'] assert study_name == test_datapackage['name'] datapackage_keys_to_ignore = ['name', 'datapackage_version', 'resources'] datapackages = (true_datapackage, test_datapackage) for name in resource_names: for datapackage in datapackages: resource = get_resource_from_name(datapackage, name) for key in resource_keys_to_ignore: monkeypatch.delitem(resource, key, raising=False) # Have to check for resources separately because they could be in any # order, it just matters that the contents are equal assert sorted(true_datapackage['resources']) == sorted( test_datapackage['resources']) for key in datapackage_keys_to_ignore: for datapackage in datapackages: monkeypatch.delitem(datapackage, key) pdt.assert_dict_equal(test_datapackage, true_datapackage)