예제 #1
0
 def test_metrics_features_threshold_error(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     with pytest.raises(ValueError,match=r"Feature threshold too high, all samples would be removed."):
         fdata,fgenes,fbc, QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True,remove_cell_cycle=True,
                                                        UMI_thresh = 1500,Features_thresh = 50,
                                                        log10FeaturesPerUMI_thresh = 0.002,
                                                        FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5)
예제 #2
0
 def test_get_cell_cycle_genes_human(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     cc_genes =  qc.get_cell_cycle_genes('human')
     assert isinstance(cc_genes,list)
     assert np.shape(cc_genes) == (125,)
     assert cc_genes[0] == 'ENSG00000097007'
     assert cc_genes[-1] ==  'ENSG00000116809'
예제 #3
0
 def test_filter_without_QC_Obj(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     fdata, fgenes,fbc = qc.filter_count_matrix(QC_metaobj=None,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,
                                         FeaturesPerUMI_thresh= 0.0001,mtRatio_thresh = 0.5)
     assert isinstance(fdata,spsp.csc.csc_matrix)
     assert np.shape(fdata) == (32,40)
     assert fdata.sum() == 63358
예제 #4
0
 def test_get_cell_cycle_genes_mouse(self):   
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     cc_genes =  qc.get_cell_cycle_genes('mouse')
     assert isinstance(cc_genes,list)
     assert np.shape(cc_genes) == (125,)
     assert cc_genes[0] == 'ENSMUSG00000026842'
     assert cc_genes[-1] ==  'ENSMUSG00000006215'
예제 #5
0
 def test_filter_umi_threshold_error_2(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     with pytest.raises(ValueError,match=r"UMI threshold too high, all samples would be removed."):
         fdata,fbc,fgenes= qc.filter_count_matrix(remove_cell_cycle=True,
                                                        UMI_thresh = 3000,Features_thresh = 39,
                                                        log10FeaturesPerUMI_thresh = 0.002,
                                                        FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5)
예제 #6
0
    def test_metrics_with_filter_DATAFRAME(self):
        qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)


        fdata,fgenes,fbc, QC_metaobj_50x40  = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,
                                                        FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5, 
                                                                  filter_count_matrix=True, remove_cell_cycle=False)
                
        assert  QC_metaobj_50x40.shape == (len(self.mtx_df_50x40),6)
        assert  np.all(QC_metaobj_50x40.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio'])
        assert np.any(QC_metaobj_50x40.isna()) == False 
        assert isinstance(fdata,spsp.csc.csc_matrix) 
        assert isinstance(fbc,list)
        assert isinstance(fgenes,list)
        assert len(fbc) == 32
        assert len(fgenes) == 40
        assert fdata.shape == (32,40)
        assert fdata.sum() == 63358
        assert len(set(fbc).difference(set(['AAACATTGAGCTAC-1','AAACATACAACCAC-1','AAACATTGATCAGC-1','AAACCGTGTATGCG-1','AAACGCACTGGTAC-1',
             'AAACGCTGACCAGT-1','AAACTTGATCCAGA-1', 'AAAGAGACGCGAGA-1', 'AAAGAGACGGCATT-1','AAAGCAGAAGCCAT-1', 'AAAGCAGATATCGG-1','AAAGCCTGTATGCG-1',
             'AAAGTTTGATCACG-1', 'AAAGTTTGGGGTGA-1', 'AAAGTTTGTAGAGA-1','AAAGTTTGTAGCGT-1', 'AAATCAACCCTATT-1', 'AAATCAACGGAAGC-1','AAATCAACTCGCAA-1',
             'AAATCCCTCCACAA-1', 'AAATCCCTGCTATG-1','AAATGTTGAACGAA-1', 'AAATGTTGCCACAA-1','AAATGTTGTGGCAT-1','AAATTCGAAGGTTC-1','AAATTCGAGCTGAT-1','AAACATTGATCTAC-1','AAACATTGATCCGC-1',
             'ATACGCACTGGTAC-1', 'AAACGCTGATCAGT-1','AAACGCGGGTTCTT-1','AAAAGCTGTAGCCA-1']))) == 0
       
        assert len(set(fgenes).difference(set(['ENSG00000243485',
                    'ENSG00000237613','ENSG00000186092','ENSG00000238009','ENSG00000239945','ENSG00000237683', 'ENSG00000239906',
                     'ENSG00000241599','ENSG00000228463','ENSG00000237094','ENSG00000235249', 'ENSG00000236601','ENSG00000236743', 'ENSG00000231709',
                     'ENSG00000239664', 'ENSG00000230021', 'ENSG00000223659','ENSG00000185097','ENSG00000235373', 'ENSG00000240618','ENSG00000229905',
                     'ENSG00000010292','ENSG00000011426','ENSG00000129055', 'ENSG00000177757', 'ENSG00000225880', 'ENSG00000230368','ENSG00000269308',
                     'ENSG00000272438','ENSG00000230699','ENSG00000210049','ENSG00000211459','ENSG00000097007', 'ENSG00000210082',
                     'ENSG00000241180', 'ENSG00000223764', 'ENSG00000187634','ENSG00000268179',
                                               'ENSG00000188976','ENSG00000187961']))) == 0
예제 #7
0
 def test_get_mito_genes_human(self): 
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     mt_genes =  qc.get_mito_genes('human')
     
     assert isinstance(mt_genes,list)
     assert np.shape(mt_genes) == (37,)
     assert mt_genes[0] == 'ENSG00000210049'
     assert mt_genes[-1] ==  'ENSG00000210196'
예제 #8
0
 def test_get_mito_genes_mouse(self): 
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     mt_genes =  qc.get_mito_genes('mouse')
     
     assert isinstance(mt_genes,list)
     assert np.shape(mt_genes) == (37,)
     assert mt_genes[0] == 'ENSMUSG00000064336'
     assert mt_genes[-1] ==  'ENSMUSG00000064372'
예제 #9
0
    def test_metrics_no_filter_CSC(self):

        qc=QualityControl(self.csc_50x40,self.genes,self.barcodes)
        QC_metaobj_50x40_csc  = qc.metrics(filter_count_matrix=False) # UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5,
        
        assert  QC_metaobj_50x40_csc.shape == (self.csc_50x40.shape[0],6)
        assert  np.all(QC_metaobj_50x40_csc.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio'])
        assert np.any(QC_metaobj_50x40_csc.isna()) == False 
        np.testing.assert_approx_equal(sum(sum(QC_metaobj_50x40_csc.values)), 107641.13463,significant=4, err_msg='metrics sum incorrect')  
예제 #10
0
    def test_metrics_no_filter_DATAFRAME(self):
        qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
        QC_metaobj_50x40  = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,
                                            FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5,filter_count_matrix=False)

        assert  QC_metaobj_50x40.shape == (len(self.mtx_df_50x40),6)
        assert  np.all(QC_metaobj_50x40.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio'])
        assert np.any(QC_metaobj_50x40.isna()) == False 
        assert sum(sum(QC_metaobj_50x40.values)) ==  107641.1346368048
        np.testing.assert_approx_equal(sum(sum(QC_metaobj_50x40.values)), 107641.13463,significant=4, err_msg='metrics sum incorrect')  
예제 #11
0
 def test_null_inputs(self):
     with pytest.raises(ValueError,match=r"Empty matrix found!"):
         qc=QualityControl(self.mtx_df_empty,self.genes,self.barcodes)  # change input to  []
     with pytest.raises(ValueError,match=r"Empty gene list found!"):
         qc=QualityControl(self.mtx_df_50x40,[],self.barcodes)
     with pytest.raises(ValueError,match=r"Empty barcode list found!"):
         qc=QualityControl(self.mtx_df_50x40,self.genes,[]) 
         
     with pytest.raises(TypeError):  
         qc=QualityControl([],self.genes,self.barcodes) 
예제 #12
0
 def test_metrics_with_filter_df_verbose(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     
     fdata,fgenes,fbc, QC_metaobj_50x40  = qc.metrics(UMI_thresh = 1500,
                                                          Features_thresh = 39,
                                                          log10FeaturesPerUMI_thresh = 0.002,
                                                          FeaturesPerUMI_thresh = 0.0001,
                                                          mtRatio_thresh = 0.5, 
                                                          filter_count_matrix=True, 
                                                          remove_cell_cycle=False,
                                                          verbose=True)
예제 #13
0
 def test_mat_to_csc(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     
     mat = qc.mat_to_csc(self.mtx_df_50x40)
     assert isinstance(mat,spsp.csc.csc_matrix)
     assert mat.shape == (50, 40)
     
     mat = qc.mat_to_csc(self.csr_50x40)
     assert isinstance(mat,spsp.csc.csc_matrix)
     assert mat.shape == (50, 40)
     
     mat = qc.mat_to_csc(spsp.coo.coo_matrix(self.mtx_df_50x40))
     assert isinstance(mat,spsp.csc.csc_matrix)
     assert mat.shape == (50, 40)
예제 #14
0
    def test_filter_with_QC_Obj_removeCC(self):
        '''test QC filter function with QC_metaobj  (call metrics to get QC_metaobj  
                    and pass it to filter_count_matrix with mtx_df)'''   
        qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)

        QC_metaobj_50x40 = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,
                                            FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5,filter_count_matrix=False)
        
        fdata, fgenes, fbc = qc.filter_count_matrix(QC_metaobj_50x40,remove_cell_cycle=True,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,
                                            FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5)#,nUMI=500,nFeatures=500,FeaturesPerUMI=0.3,mtRatio=0.05)
        assert isinstance(fdata,spsp.csc.csc_matrix)
        assert isinstance(fbc,list)
        assert isinstance(fgenes,list)
        assert np.shape(fdata) == (32,38)
        assert fdata.sum() == 60257
예제 #15
0
 def test_filter_both(self):  
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     
     QC_metaobj_50x40 = qc.metrics(filter_count_matrix=False)
     fdata1,genes1,bc1 =  qc.filter_count_matrix(QC_metaobj_50x40,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh= 0.0001,mtRatio_thresh = 0.5)
     
     qc2=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) 
     fdata2, genes2, bc2 = qc2.filter_count_matrix(QC_metaobj=None,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh= 0.0001,mtRatio_thresh = 0.5)
     assert fdata1.shape == fdata2.shape
     assert np.all(bc1  ==  bc2)
     assert np.all(genes1  ==  genes2)
예제 #16
0
 def test_log10FeaturesPerUMI_thresh(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     fdata1,fgenes1,fbc1 = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=.48)
     
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     fdata2,fgenes2,fbc2, QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True,log10FeaturesPerUMI_thresh=.48)
     
     assert fdata1.shape == fdata2.shape
예제 #17
0
 def test_filter_umi_threshold_error(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     fdata,fgenes,fbc = qc.filter_count_matrix(UMI_thresh=900,remove_cell_cycle=True,verbose=True)
예제 #18
0
 def test_filter_allFilters(self):
         qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
         fdata,fgenes,fbc = qc.filter_count_matrix(UMI_thresh=1700,Features_thresh=39,FeaturesPerUMI_thresh=.02,
                                       log10FeaturesPerUMI_thresh=.48,mtRatio_thresh=.1)
         assert fdata.shape == (14, 40)
예제 #19
0
    def test_filter_allFilters_compare(self):        
            qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
            
            d0,b,a = qc.filter_count_matrix(UMI_thresh=1700)            
            qc=QualityControl(d0,b,a)
            
            d1,b,a = qc.filter_count_matrix(Features_thresh=39)
            qc=QualityControl(d1,b,a)
            
            d2,b,a = qc.filter_count_matrix(FeaturesPerUMI_thresh=.02)
            qc=QualityControl(d2,b,a)
            
            d3,b,a = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=.48)
            qc=QualityControl(d3,b,a)
            
            d4,fgenes4,fbc4 = qc.filter_count_matrix(mtRatio_thresh=.1)
            qc=QualityControl(d4,fgenes4,fbc4)
            
            qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
            d_all,fgenes_all,fbc_all = qc.filter_count_matrix(UMI_thresh=1700,Features_thresh=39,FeaturesPerUMI_thresh=.02,
                                          log10FeaturesPerUMI_thresh=.48,mtRatio_thresh=.1)

            assert (d0.shape[0],d1.shape[0],d2.shape[0],d3.shape[0],d4.shape[0],d_all.shape[0]) == (45, 30, 17, 17, 14, 14)
            assert np.all(fgenes4 == fgenes_all)
            assert np.all(fbc4 == fbc_all)
            assert np.sum(d4) ==  np.sum(d_all) 
예제 #20
0
 def test_filter_mtRatio(self):
         qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
         fdata,fgenes,fbc = qc.filter_count_matrix(mtRatio_thresh=.1)
         assert fdata.shape == (43,40)
예제 #21
0
 def test_filter_log10featuresPerUMI(self):
         qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
         fdata,fgenes,fbc = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=.48)
         assert fdata.shape == (43, 40)
예제 #22
0
 def test_filter_log10featuresPerUMI_threshold_wrong_type(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     with pytest.raises(ValueError,match=r"log10featurePerUMI threshold must be an integer or float."):
         fdata,fgenes,fbc = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=list([1,2,3]))    
예제 #23
0
 def test_metrics_filter_no_args(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     with pytest.raises(ValueError):
         fdata,fgenes,fbc,QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True)
예제 #24
0
 def test_filter_mtRatio_threshold_wrong_type(self):   
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     with pytest.raises(ValueError,match=r"mtRatio threshold must be a float between 0 and 1."):
         fdata,fgenes, fbc = qc.filter_count_matrix(mtRatio_thresh=-1)
예제 #25
0
 def test_wrong_matrix_type(self):
     with pytest.raises(TypeError):   
         qc=QualityControl(np.asarray(self.mtx_df_50x40),self.genes,self.barcodes)
예제 #26
0
 def test_filter_mtRatio_threshold_error(self):   
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     with pytest.raises(ValueError,match=r"MT ratio threshold too low, all samples would be removed."):
         fdata,fgenes,fbc  = qc.filter_count_matrix(mtRatio_thresh=.000001)
예제 #27
0
 def test_wrong_gene_length(self):
     with pytest.raises(ValueError):
         qc=QualityControl(self.mtx_df_50x40,self.genes[:-5],self.barcodes) 
 
     with pytest.raises(ValueError):    
         qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes[:-5]) 
예제 #28
0
 def test_get_mt_idx(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     mt_idx = qc.get_mt_idx(self.genes)
     mt_idx_assarray = qc.get_mt_idx(np.array(self.genes))
     assert isinstance(mt_idx,list)
     assert mt_idx_assarray == [30, 31, 33]
예제 #29
0
 def test_filter_log10featuresPerUMI_threshold_error(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     with pytest.raises(ValueError,match=r"log10 Feature per UMI threshold too high, all samples would be removed."):
         fdata,fgenes,fbc  = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=.9)
예제 #30
0
 def test_get_cc_idx(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     cc_idx = qc.get_cc_idx(self.genes)
     cc_idx_assarray = qc.get_cc_idx(np.array(self.genes))
     assert isinstance(cc_idx_assarray,list)
     assert cc_idx_assarray == [32, 23]