Ejemplo n.º 1
0
 def test_metrics_features_threshold_error(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     with pytest.raises(ValueError,match=r"Feature threshold too high, all samples would be removed."):
         fdata,fgenes,fbc, QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True,remove_cell_cycle=True,
                                                        UMI_thresh = 1500,Features_thresh = 50,
                                                        log10FeaturesPerUMI_thresh = 0.002,
                                                        FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5)
Ejemplo n.º 2
0
    def test_metrics_with_filter_DATAFRAME(self):
        qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)


        fdata,fgenes,fbc, QC_metaobj_50x40  = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,
                                                        FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5, 
                                                                  filter_count_matrix=True, remove_cell_cycle=False)
                
        assert  QC_metaobj_50x40.shape == (len(self.mtx_df_50x40),6)
        assert  np.all(QC_metaobj_50x40.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio'])
        assert np.any(QC_metaobj_50x40.isna()) == False 
        assert isinstance(fdata,spsp.csc.csc_matrix) 
        assert isinstance(fbc,list)
        assert isinstance(fgenes,list)
        assert len(fbc) == 32
        assert len(fgenes) == 40
        assert fdata.shape == (32,40)
        assert fdata.sum() == 63358
        assert len(set(fbc).difference(set(['AAACATTGAGCTAC-1','AAACATACAACCAC-1','AAACATTGATCAGC-1','AAACCGTGTATGCG-1','AAACGCACTGGTAC-1',
             'AAACGCTGACCAGT-1','AAACTTGATCCAGA-1', 'AAAGAGACGCGAGA-1', 'AAAGAGACGGCATT-1','AAAGCAGAAGCCAT-1', 'AAAGCAGATATCGG-1','AAAGCCTGTATGCG-1',
             'AAAGTTTGATCACG-1', 'AAAGTTTGGGGTGA-1', 'AAAGTTTGTAGAGA-1','AAAGTTTGTAGCGT-1', 'AAATCAACCCTATT-1', 'AAATCAACGGAAGC-1','AAATCAACTCGCAA-1',
             'AAATCCCTCCACAA-1', 'AAATCCCTGCTATG-1','AAATGTTGAACGAA-1', 'AAATGTTGCCACAA-1','AAATGTTGTGGCAT-1','AAATTCGAAGGTTC-1','AAATTCGAGCTGAT-1','AAACATTGATCTAC-1','AAACATTGATCCGC-1',
             'ATACGCACTGGTAC-1', 'AAACGCTGATCAGT-1','AAACGCGGGTTCTT-1','AAAAGCTGTAGCCA-1']))) == 0
       
        assert len(set(fgenes).difference(set(['ENSG00000243485',
                    'ENSG00000237613','ENSG00000186092','ENSG00000238009','ENSG00000239945','ENSG00000237683', 'ENSG00000239906',
                     'ENSG00000241599','ENSG00000228463','ENSG00000237094','ENSG00000235249', 'ENSG00000236601','ENSG00000236743', 'ENSG00000231709',
                     'ENSG00000239664', 'ENSG00000230021', 'ENSG00000223659','ENSG00000185097','ENSG00000235373', 'ENSG00000240618','ENSG00000229905',
                     'ENSG00000010292','ENSG00000011426','ENSG00000129055', 'ENSG00000177757', 'ENSG00000225880', 'ENSG00000230368','ENSG00000269308',
                     'ENSG00000272438','ENSG00000230699','ENSG00000210049','ENSG00000211459','ENSG00000097007', 'ENSG00000210082',
                     'ENSG00000241180', 'ENSG00000223764', 'ENSG00000187634','ENSG00000268179',
                                               'ENSG00000188976','ENSG00000187961']))) == 0
Ejemplo n.º 3
0
 def test_log10FeaturesPerUMI_thresh(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     fdata1,fgenes1,fbc1 = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=.48)
     
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     fdata2,fgenes2,fbc2, QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True,log10FeaturesPerUMI_thresh=.48)
     
     assert fdata1.shape == fdata2.shape
Ejemplo n.º 4
0
    def test_metrics_no_filter_CSC(self):

        qc=QualityControl(self.csc_50x40,self.genes,self.barcodes)
        QC_metaobj_50x40_csc  = qc.metrics(filter_count_matrix=False) # UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5,
        
        assert  QC_metaobj_50x40_csc.shape == (self.csc_50x40.shape[0],6)
        assert  np.all(QC_metaobj_50x40_csc.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio'])
        assert np.any(QC_metaobj_50x40_csc.isna()) == False 
        np.testing.assert_approx_equal(sum(sum(QC_metaobj_50x40_csc.values)), 107641.13463,significant=4, err_msg='metrics sum incorrect')  
Ejemplo n.º 5
0
    def test_metrics_no_filter_DATAFRAME(self):
        qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
        QC_metaobj_50x40  = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,
                                            FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5,filter_count_matrix=False)

        assert  QC_metaobj_50x40.shape == (len(self.mtx_df_50x40),6)
        assert  np.all(QC_metaobj_50x40.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio'])
        assert np.any(QC_metaobj_50x40.isna()) == False 
        assert sum(sum(QC_metaobj_50x40.values)) ==  107641.1346368048
        np.testing.assert_approx_equal(sum(sum(QC_metaobj_50x40.values)), 107641.13463,significant=4, err_msg='metrics sum incorrect')  
Ejemplo n.º 6
0
 def test_filter_both(self):  
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     
     QC_metaobj_50x40 = qc.metrics(filter_count_matrix=False)
     fdata1,genes1,bc1 =  qc.filter_count_matrix(QC_metaobj_50x40,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh= 0.0001,mtRatio_thresh = 0.5)
     
     qc2=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) 
     fdata2, genes2, bc2 = qc2.filter_count_matrix(QC_metaobj=None,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh= 0.0001,mtRatio_thresh = 0.5)
     assert fdata1.shape == fdata2.shape
     assert np.all(bc1  ==  bc2)
     assert np.all(genes1  ==  genes2)
Ejemplo n.º 7
0
 def test_metrics_with_filter_df_verbose(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     
     fdata,fgenes,fbc, QC_metaobj_50x40  = qc.metrics(UMI_thresh = 1500,
                                                          Features_thresh = 39,
                                                          log10FeaturesPerUMI_thresh = 0.002,
                                                          FeaturesPerUMI_thresh = 0.0001,
                                                          mtRatio_thresh = 0.5, 
                                                          filter_count_matrix=True, 
                                                          remove_cell_cycle=False,
                                                          verbose=True)
Ejemplo n.º 8
0
    def test_filter_with_QC_Obj(self):
  
        qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)

        QC_metaobj_50x40 = qc.metrics(filter_count_matrix=False)
        fdata,fgenes,fbc = qc.filter_count_matrix(QC_metaobj_50x40,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,
                                            FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5)
        
        assert isinstance(fdata,spsp.csc.csc_matrix)
        assert isinstance(fbc,list)
        assert isinstance(fgenes,list)
        assert np.shape(fdata) == (32,40)
        assert fdata.sum() == 63358
Ejemplo n.º 9
0
    def test_filter_with_QC_Obj_removeCC(self):
        '''test QC filter function with QC_metaobj  (call metrics to get QC_metaobj  
                    and pass it to filter_count_matrix with mtx_df)'''   
        qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)

        QC_metaobj_50x40 = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,
                                            FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5,filter_count_matrix=False)
        
        fdata, fgenes, fbc = qc.filter_count_matrix(QC_metaobj_50x40,remove_cell_cycle=True,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,
                                            FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5)#,nUMI=500,nFeatures=500,FeaturesPerUMI=0.3,mtRatio=0.05)
        assert isinstance(fdata,spsp.csc.csc_matrix)
        assert isinstance(fbc,list)
        assert isinstance(fgenes,list)
        assert np.shape(fdata) == (32,38)
        assert fdata.sum() == 60257
Ejemplo n.º 10
0
 def test_metrics_filter_no_args(self):
     qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes)
     with pytest.raises(ValueError):
         fdata,fgenes,fbc,QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True)