Esempio n. 1
0
def create_embedding_datamatrix(dm, n_components):
    em = dataclasses.datamatrix(
        rowname=dm.rowname,
        rowlabels=dm.rowlabels.copy(),
        rowmeta=dm.rowmeta.copy(),
        columnname='latent_component',
        columnlabels=np.array(['LC' + str(x) for x in range(n_components)],
                              dtype='object'),
        columnmeta={},
        matrixname='sdae_embedding_of_' + dm.matrixname,
        matrix=np.zeros((dm.shape[0], n_components), dtype='float32'))
    return em
Esempio n. 2
0
with open('aligned_matrices_{0}/gene_atb_{1}.pickle'.format(
        analysis_version, datasetabbrev),
          mode='rb') as fr:
    gene_atb = pickle.load(fr)
with open('aligned_matrices_{0}/gene_cst_{1}.pickle'.format(
        analysis_version, datasetabbrev),
          mode='rb') as fr:
    gene_cst = pickle.load(fr)

# initialize outputs
atb_cst = {}
atb_cst['test_statistic_values'] = dc.datamatrix(
    rowname=gene_atb.columnname,
    rowlabels=gene_atb.columnlabels.copy(),
    rowmeta=copy.deepcopy(gene_atb.columnmeta),
    columnname=gene_cst.columnname,
    columnlabels=gene_cst.columnlabels.copy(),
    columnmeta=copy.deepcopy(gene_cst.columnmeta),
    matrixname='atb_cluster_correlation',
    matrix=np.zeros((gene_atb.shape[1], gene_cst.shape[1]), dtype='float64'))
atb_cst['pvalues'] = copy.deepcopy(atb_cst['test_statistic_values'])

# computation
starttime = time.time()
print('starting {0!s} permutations...'.format(numperm))
atb_cst['test_statistic_values'].matrix, atb_cst[
    'pvalues'].matrix = feature_selection_test_function(X=gene_cst.matrix,
                                                        Y=gene_atb.matrix,
                                                        numperm=numperm)
atb_cst['pvalues'].matrix = atb_cst['pvalues'].matrix.T
if batch == 0:
        sys.path.append(custompath)
del custompath, custompaths

import numpy as np
import machinelearning.dataclasses as dc
import pickle

# load clusters
with open('clusters.pickle', 'rb') as fr:
    gene_syms, gene_ids, cluster_ids = pickle.load(fr)
unique_cluster_ids = np.array([str(x) for x in np.unique(cluster_ids)],
                              dtype='object')

# create matrix
gene_clust = dc.datamatrix(
    rowname='gene_sym',
    rowlabels=gene_syms,
    rowmeta={'gene_id': gene_ids},
    columnname='cluster_id',
    columnlabels=unique_cluster_ids,
    columnmeta={},
    matrixname=
    'gene_cluster_assignments_from_denoising_autoencoder_applied_to_GTEX',
    matrix=np.zeros((gene_syms.size, unique_cluster_ids.size), dtype='bool'))
for j, cluster_id in enumerate(gene_clust.columnlabels):
    gene_clust.matrix[:, j] = cluster_ids == int(cluster_id)

# write matrix
with open('gene_cluster_matrix.pickle', 'wb') as fw:
    pickle.dump(gene_clust, fw)
            ) / np.pi  # divide by pi if similarity scores can be negative, otherwise divide by pi/2
        else:
            D = np.arccos(D) / (np.pi / 2)
        return D
    else:
        raise ValueError('invalid distance metric')


gene_atb = dc.datamatrix(rowname='GeneSym',
                         rowlabels=np.concatenate(
                             (train.rowlabels, valid.rowlabels,
                              test.rowlabels)),
                         rowmeta={
                             x: np.concatenate(
                                 (train.rowmeta[x], valid.rowmeta[x],
                                  test.rowmeta[x]))
                             for x in train.rowmeta
                         },
                         columnname='Tissue',
                         columnlabels=train.columnlabels.copy(),
                         columnmeta={},
                         matrixname='zscored_tissue_expression',
                         matrix=np.concatenate(
                             (train.matrix, valid.matrix, test.matrix), 0))

gene_proj = copy.deepcopy(gene_atb)
gene_proj.columnlabels = np.array(['X', 'Y'], dtype='object')
gene_proj.columnname = 'Neuron'
gene_proj.matrixname = '2d_dnn_projection_of_zscored_tissue_expression'
gene_proj.matrix = sess.run(h[-1], feed_dict={x: gene_atb.matrix})
gene_proj.updatesizeattribute()
gene_proj.updateshapeattribute()