Exemple #1
0
         analysis_version, datasetabbrev)):
     continue
 with open('datasets_in_progress_{0}/{1}.txt'.format(
         analysis_version, datasetabbrev),
           mode='wt',
           encoding="utf-8",
           errors="surrogateescape") as fw:
     fw.write('working on {}...'.format(datasetabbrev))
 print('working on {}...'.format(datasetabbrev))
 # load dataset
 gene_atb = datasetselection.loaddatamatrix(
     datasetpath=datasetinfo['path'],
     rowname='gene',
     columnname='atb',
     matrixname='gene_atb_associations',
     skiprows=3,
     skipcolumns=3,
     delimiter='\t',
     dtype='float64',
     getmetadata=True,  # need to fix False case
     getmatrix=True)
 # check binary
 if set(np.unique(gene_atb.matrix)) != {0, 1}:
     print('warning: converting matrix to binary values')
 gene_atb.matrix = gene_atb.matrix != 0
 gene_atb.updatedtypeattribute()
 # compute feature similarity
 atb_atb = gene_atb.tosimilarity(axis=1, metric=similarity_metric)
 # align with clusters
 commongenes = gene_atb.rowlabels[np.in1d(gene_atb.rowlabels,
                                          gene_cluster.rowlabels)]
Exemple #2
0
    if custompath not in sys.path:
        sys.path.append(custompath)
del custompath, custompaths

import numpy as np
import pickle
import machinelearning.datasetselection as ds
import os

# load the data
gene_atb = ds.loaddatamatrix(
    'data/original_data/gene_attribute_matrix_cleaned.txt.gz',
    rowname='gene',
    columnname='atb',
    matrixname='gene_atb_associations',
    skiprows=3,
    skipcolumns=3,
    delimiter='\t',
    dtype='float32',
    getmetadata=True,  # need to fix False case
    getmatrix=True)

# shuffle the data
gene_atb.reorder(np.random.permutation(gene_atb.shape[0]), 0)
gene_atb.reorder(np.random.permutation(gene_atb.shape[1]), 1)

# standardize the data
row_mean = gene_atb.matrix.mean(1)
row_stdv = gene_atb.matrix.std(1)
standardized_row_mean = (row_mean - row_mean.mean()) / row_mean.std()
standardized_row_stdv = (row_stdv - row_stdv.mean()) / row_stdv.std()