Example #1
0
# In[3]:

# load raw data
old_results_df = au.load_stratified_prediction_results(old_results_dir, 'gene')
new_results_df = au.load_stratified_prediction_results(new_results_dir, 'gene')

# here we want to use compressed data for methylation datasets (27k and 450k)
# the results in 02_classify_compressed/compressed_vs_raw_results.ipynb show that
# performance is equal or slightly better for PCA compressed methylation data,
# and it's much easier/faster to fit models on
old_results_df = old_results_df[old_results_df.training_data.isin(
    ['expression'])].copy()
new_results_df = new_results_df[new_results_df.training_data.isin(
    ['expression'])].copy()

old_compressed_results_df = au.load_compressed_prediction_results(
    old_results_dir, 'gene', old_filenames=True)
# load compressed data for me_27k and me_450k
old_compressed_results_df = old_compressed_results_df[
    (old_compressed_results_df.training_data.isin(['me_27k', 'me_450k']))
    & (old_compressed_results_df.n_dims == 5000)].copy()

new_compressed_results_df = au.load_compressed_prediction_results(
    new_results_dir, 'gene')
new_compressed_results_df = new_compressed_results_df[
    (new_compressed_results_df.training_data.isin(['me_27k', 'me_450k']))
    & (new_compressed_results_df.n_dims == 5000)].copy()

old_results_df['n_dims'] = 'raw'
old_results_df = pd.concat((old_results_df, old_compressed_results_df))
new_results_df['n_dims'] = 'raw'
new_results_df = pd.concat((new_results_df, new_compressed_results_df))
Example #2
0
for ix, data in enumerate(data_order):
    handle = mpatches.Patch(color=sns.color_palette()[ix], label=data)
    handles.append(handle)

axarr[1, 2].legend(handles=handles, loc='lower right')

# ### Compare single-omics and multi-omics results

# In[5]:

# get results from unimodal prediction (individual data types) to compare with
unimodal_results_dir = Path(cfg.results_dirs['mutation'],
                            'methylation_results', 'gene')

# load expression and me_27k results
u_results_df = au.load_compressed_prediction_results(unimodal_results_dir,
                                                     'gene')
u_results_df = u_results_df[(u_results_df.n_dims == 5000)].copy()
u_results_df.drop(columns='n_dims', inplace=True)

# make sure data loaded matches our expectations
print(u_results_df.training_data.unique())
print(u_results_df.seed.unique())

# In[6]:

# first, concatenate the unimodal results and the multimodal results
all_results_df = pd.concat((results_df, u_results_df))

print(all_results_df.shape)
print(all_results_df.training_data.unique())
all_results_df.head()
Example #3
0

# load raw data
raw_results_df = au.load_stratified_prediction_results(results_dir, 'gene')

print(raw_results_df.shape)
print(raw_results_df.seed.unique())
print(raw_results_df.training_data.unique())
raw_results_df.head()


# In[4]:


# load compressed data
compressed_results_df = au.load_compressed_prediction_results(results_dir, 'gene')

print(compressed_results_df.shape)
print(compressed_results_df.seed.unique())
print(compressed_results_df.n_dims.unique())
print(compressed_results_df.training_data.unique())
compressed_results_df.head()


# In[5]:


def label_points(x, y, gene, sig, ax):
    text_labels = []
    a = pd.DataFrame({'x': x, 'y': y, 'gene': gene, 'sig': sig})
    for i, point in a.iterrows():