Example #1
0
def test_readtxt():
    files = ['data/cell_03', 'data/cell_01', 'data/cell_02']
    x = scHiCs(files,
               reference_genome='mm9',
               resolution=100000,
               max_distance=4000000,
               format='shortest_score',
               adjust_resolution=False,
               chromosomes='except Y',
               operations=['convolution', 'random_walk', 'network_enhancing'],
               kernel_shape=3,
               keep_n_strata=10)

    y = scHiCs(files,
               reference_genome='mm9',
               resolution=100000,
               max_distance=4000000,
               format='shortest_score',
               adjust_resolution=False,
               chromosomes='except Y',
               operations=['convolution', 'random_walk', 'network_enhancing'],
               kernel_shape=3,
               keep_n_strata=10,
               store_full_map=True,
               parallelize=True)
    assert x.num_of_cells == len(files)
    assert y.num_of_cells == len(files)
    assert x.resolution == y.resolution
    assert x.chromosomes == y.chromosomes
    assert x.chromosome_lengths == y.chromosome_lengths
    assert np.array_equal(x.contacts, y.contacts)
    assert np.array_equal(x.short_range, y.short_range)
    assert np.array_equal(x.mitotic, y.mitotic)
Example #2
0
def test_readmcool():
    files=['data/test.mcool']
    y = scHiCs(files, reference_genome='hg19', resolution=2500000,
                max_distance=4000000, format='cool', adjust_resolution=True,
                chromosomes='all', operations=['convolution'],
                kernel_shape=3, keep_n_strata=10, store_full_map=True)
    assert y.num_of_cells==len(files)
Example #3
0
def test_select_cells():
    files = ['data/cell_03','data/cell_01','data/cell_02']
    x = scHiCs(files, reference_genome='mm9', resolution=50000,
               max_distance=4000000, format='shortest_score',
               adjust_resolution=True, chromosomes='except Y',
               operations=['convolution'], kernel_shape=3, keep_n_strata=10,
               store_full_map=False)
    x.plot_contacts()
    x.select_cells(n_contacts=[240000,float('inf')], short_range=[0,0.583], mitotic=[0,.9])
    assert (x.contacts>240000).all()
    assert (x.short_range/x.contacts<=0.583).all()
    assert (x.mitotic/x.contacts<=.9).all()
Example #4
0
def test_readtxt():
    files = ['data/cell_03', 'data/cell_01', 'data/cell_02']
    x = scHiCs(files,
               reference_genome='mm9',
               resolution=100000,
               max_distance=4000000,
               format='shortest_score',
               adjust_resolution=False,
               chromosomes='except Y',
               operations=['convolution', 'random_walk', 'network_enhancing'],
               kernel_shape=3,
               keep_n_strata=10,
               store_full_map=True)
    assert x.num_of_cells == len(files)
Example #5
0
def test():

    y = scHiCs(['data/cell_03', 'data/cell_01', 'data/cell_02'],
               reference_genome='mm9',
               resolution=100000,
               max_distance=4000000,
               format='shortest_score',
               adjust_resolution=False,
               chromosomes='except Y',
               operations=['convolution'],
               kernel_shape=3,
               keep_n_strata=10,
               store_full_map=True)

    y.plot_contacts()

    emb1 = y.learn_embedding(similarity_method='innerproduct',
                             return_distance=True,
                             embedding_method='mds',
                             aggregation='median')

    emb2 = y.learn_embedding(similarity_method='HiCRep',
                             return_distance=True,
                             embedding_method='mds',
                             aggregation='median')

    emb3 = y.learn_embedding(similarity_method='Selfish',
                             return_distance=True,
                             embedding_method='mds',
                             aggregation='median')

    emb4 = y.learn_embedding(similarity_method='innerproduct',
                             return_distance=True,
                             embedding_method='mds',
                             aggregation='mean')

    emb5 = y.learn_embedding(similarity_method='innerproduct',
                             return_distance=True,
                             embedding_method='tSNE',
                             aggregation='median')

    # emb6 = y.learn_embedding(similarity_method='innerproduct',
    #                      return_distance=True,
    #                      embedding_method='UMAP',
    #                      aggregation='median',
    #                      print_time=False)

    emb7 = y.learn_embedding(similarity_method='innerproduct',
                             return_distance=True,
                             embedding_method='phate',
                             aggregation='median',
                             k=2)

    emb8 = y.learn_embedding(similarity_method='innerproduct',
                             return_distance=True,
                             embedding_method='spectral_embedding',
                             aggregation='median',
                             print_time=False)

    label1 = y.clustering(n_clusters=2,
                          clustering_method='kmeans',
                          similarity_method='innerproduct',
                          aggregation='median',
                          n_strata=None)

    label2 = y.clustering(n_clusters=2,
                          clustering_method='spectral_clustering',
                          similarity_method='innerproduct',
                          aggregation='median',
                          n_strata=None)

    hicluster = y.scHiCluster(dim=2, cutoff=0.8, n_PCs=10, n_clusters=2)

    assert len(set(label1)) == 2
    assert len(set(label2)) == 2
    assert len(set(hicluster[1])) == 2
    assert emb1[0].shape == (3, 2)
    assert emb1[1].shape == (3, 3)
    assert emb2[0].shape == (3, 2)
    assert emb2[1].shape == (3, 3)
    assert emb3[0].shape == (3, 2)
    assert emb3[1].shape == (3, 3)
    assert emb4[0].shape == (3, 2)
    assert emb4[1].shape == (3, 3)
    assert emb5[0].shape == (3, 2)
    assert emb5[1].shape == (3, 3)
    # assert emb6[0].shape==(3,2)
    # assert emb6[1].shape==(3,3)
    assert emb7[0].shape == (3, 2)
    assert emb7[1].shape == (3, 3)
    assert emb8[0].shape == (3, 2)
    assert emb8[1].shape == (3, 3)

    plt.figure()
    plt.subplot(1, 2, 1)
    scatter(emb1[0] * 100, label=label1)
    plt.subplot(1, 2, 2)
    scatter(emb2[0])

    plt.figure()
    scatter(hicluster[0], label=hicluster[1])
Example #6
0
from time import time
import sys
sys.path.insert(0, '../')
print(sys.path)
from scHiCTools import scHiCs


# Learn the embeddings of cell_01, cell_02 and cell_03
# Time:
# Time 0: input I/O and pre-processing
# Time 1: process each strata
# Time 2: calculating pairwise similarity

t0 = time()
x = scHiCs(['cell_01', 'cell_02', 'cell_03'], reference_genome='mm9', resolution=500000, keep_n_strata=10,
           format='shortest_score', resolution_adjust=False, chromosomes=['chr1'], store_full_map=True
           )
print(x.learn_embedding('inner_product', 'MDS', dim=2, n_strata=10))

x.processing(['random_walk', 'network_enhancing'])
print(x.learn_embedding('inner_product', 'MDS', dim=2, n_strata=10))
import numpy as np
from scHiCTools import scHiCs
from scHiCTools import scatter
labels = np.load("../data/Ramani/cell_label.npy")
all_cells = open('../data/Ramani/all_cells.txt').readlines()
cell_files = [
    '../data/Ramani/cell_matrix_data_schictools/' + cellname.strip()
    for cellname in all_cells
]

y = scHiCs(cell_files,
           reference_genome='hg19',
           resolution=1000000,
           max_distance=None,
           format='shortest_score',
           adjust_resolution=False,
           chromosomes='except Y',
           operations=['log2', 'convolution', 'random_walk'],
           kernel_shape=3,
           keep_n_strata=None,
           store_full_map=True)
matrix = []
for ch in y.chromosomes:
    A = y.full_maps[ch].copy()
    A.shape = (A.shape[0], A.shape[1] * A.shape[2])
    thres = np.percentile(A, 80, axis=1)
    A = (A > thres[:, None])
    pca = PCA(n_components=20)
    R_reduce = pca.fit_transform(A)
    matrix.append(R_reduce)
matrix = np.concatenate(matrix, axis=1)
Example #8
0
from time import time
import sys
sys.path.insert(0, '../')
print(sys.path)
from scHiCTools import scHiCs


# Learn the embeddings of cell_01, cell_02 and cell_03
# Time:
# Time 0: input I/O and pre-processing
# Time 1: process each strata
# Time 2: calculating pairwise similarity

t0 = time()
x = scHiCs(['cell_01', 'cell_02', 'cell_03'], reference_genome='mm9', resolution=500000, max_distance=4000000,
           format='shortest_score', resolution_adjust=False, chromosomes='except Y',
           operations=['convolution'], kernel_shape=3, keep_n_strata=10, store_full_map=False
           )
print('Time 0:', time() - t0)
emb, _ = x.learn_embedding(similarity_method='selfish', embedding_method='MDS', aggregation='median', print_time=False, return_distance=True)
print(emb)
print(_)



Example #9
0
def test1():
    fileLst100 = [
        "../../Nagano/1CDX_cells/1CDX1.1/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.185/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.281/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.38/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.46/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.117/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.202/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.294/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.377/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.465/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.108/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.182/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.263/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.352/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.68/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.154/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.237/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.312/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.392/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.468/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.101/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.186/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.283/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.381/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.464/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.12/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.203/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.295/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.382/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.466/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.11/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.183/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.264/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.353/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.72/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.155/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.24/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.313/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.393/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.47/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.102/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.187/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.284/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.383/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.466/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.121/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.204/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.296/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.383/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.467/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.111/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.185/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.265/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.354/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.73/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.156/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.241/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.314/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.394/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.472/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.103/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.191/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.285/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.384/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.468/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.122/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.205/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.297/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.384/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.468/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.112/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.186/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.266/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.355/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.74/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.157/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.242/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.315/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.396/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.473/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.104/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.192/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.286/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.385/new_adj",
        "../../Nagano/1CDX_cells/1CDX1.47/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.123/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.206/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.3/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.385/new_adj",
        "../../Nagano/1CDX_cells/1CDX2.47/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.113/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.187/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.267/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.356/new_adj",
        "../../Nagano/1CDX_cells/1CDX3.75/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.158/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.243/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.316/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.397/new_adj",
        "../../Nagano/1CDX_cells/1CDX4.474/new_adj"
    ]
    y = scHiCs(fileLst100,
               reference_genome='mm9',
               resolution=500000,
               max_distance=4000000,
               format='shortest_score',
               adjust_resolution=True,
               chromosomes='except Y',
               operations=['convolution'],
               kernel_shape=3,
               keep_n_strata=10,
               store_full_map=False)
    y.test_fast()