def test_metrics_features_threshold_error(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) with pytest.raises(ValueError,match=r"Feature threshold too high, all samples would be removed."): fdata,fgenes,fbc, QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True,remove_cell_cycle=True, UMI_thresh = 1500,Features_thresh = 50, log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5)
def test_get_cell_cycle_genes_human(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) cc_genes = qc.get_cell_cycle_genes('human') assert isinstance(cc_genes,list) assert np.shape(cc_genes) == (125,) assert cc_genes[0] == 'ENSG00000097007' assert cc_genes[-1] == 'ENSG00000116809'
def test_filter_without_QC_Obj(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata, fgenes,fbc = qc.filter_count_matrix(QC_metaobj=None,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh= 0.0001,mtRatio_thresh = 0.5) assert isinstance(fdata,spsp.csc.csc_matrix) assert np.shape(fdata) == (32,40) assert fdata.sum() == 63358
def test_get_cell_cycle_genes_mouse(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) cc_genes = qc.get_cell_cycle_genes('mouse') assert isinstance(cc_genes,list) assert np.shape(cc_genes) == (125,) assert cc_genes[0] == 'ENSMUSG00000026842' assert cc_genes[-1] == 'ENSMUSG00000006215'
def test_filter_umi_threshold_error_2(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) with pytest.raises(ValueError,match=r"UMI threshold too high, all samples would be removed."): fdata,fbc,fgenes= qc.filter_count_matrix(remove_cell_cycle=True, UMI_thresh = 3000,Features_thresh = 39, log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5)
def test_metrics_with_filter_DATAFRAME(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata,fgenes,fbc, QC_metaobj_50x40 = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5, filter_count_matrix=True, remove_cell_cycle=False) assert QC_metaobj_50x40.shape == (len(self.mtx_df_50x40),6) assert np.all(QC_metaobj_50x40.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio']) assert np.any(QC_metaobj_50x40.isna()) == False assert isinstance(fdata,spsp.csc.csc_matrix) assert isinstance(fbc,list) assert isinstance(fgenes,list) assert len(fbc) == 32 assert len(fgenes) == 40 assert fdata.shape == (32,40) assert fdata.sum() == 63358 assert len(set(fbc).difference(set(['AAACATTGAGCTAC-1','AAACATACAACCAC-1','AAACATTGATCAGC-1','AAACCGTGTATGCG-1','AAACGCACTGGTAC-1', 'AAACGCTGACCAGT-1','AAACTTGATCCAGA-1', 'AAAGAGACGCGAGA-1', 'AAAGAGACGGCATT-1','AAAGCAGAAGCCAT-1', 'AAAGCAGATATCGG-1','AAAGCCTGTATGCG-1', 'AAAGTTTGATCACG-1', 'AAAGTTTGGGGTGA-1', 'AAAGTTTGTAGAGA-1','AAAGTTTGTAGCGT-1', 'AAATCAACCCTATT-1', 'AAATCAACGGAAGC-1','AAATCAACTCGCAA-1', 'AAATCCCTCCACAA-1', 'AAATCCCTGCTATG-1','AAATGTTGAACGAA-1', 'AAATGTTGCCACAA-1','AAATGTTGTGGCAT-1','AAATTCGAAGGTTC-1','AAATTCGAGCTGAT-1','AAACATTGATCTAC-1','AAACATTGATCCGC-1', 'ATACGCACTGGTAC-1', 'AAACGCTGATCAGT-1','AAACGCGGGTTCTT-1','AAAAGCTGTAGCCA-1']))) == 0 assert len(set(fgenes).difference(set(['ENSG00000243485', 'ENSG00000237613','ENSG00000186092','ENSG00000238009','ENSG00000239945','ENSG00000237683', 'ENSG00000239906', 'ENSG00000241599','ENSG00000228463','ENSG00000237094','ENSG00000235249', 'ENSG00000236601','ENSG00000236743', 'ENSG00000231709', 'ENSG00000239664', 'ENSG00000230021', 'ENSG00000223659','ENSG00000185097','ENSG00000235373', 'ENSG00000240618','ENSG00000229905', 'ENSG00000010292','ENSG00000011426','ENSG00000129055', 'ENSG00000177757', 'ENSG00000225880', 'ENSG00000230368','ENSG00000269308', 'ENSG00000272438','ENSG00000230699','ENSG00000210049','ENSG00000211459','ENSG00000097007', 'ENSG00000210082', 'ENSG00000241180', 'ENSG00000223764', 'ENSG00000187634','ENSG00000268179', 'ENSG00000188976','ENSG00000187961']))) == 0
def test_get_mito_genes_human(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) mt_genes = qc.get_mito_genes('human') assert isinstance(mt_genes,list) assert np.shape(mt_genes) == (37,) assert mt_genes[0] == 'ENSG00000210049' assert mt_genes[-1] == 'ENSG00000210196'
def test_get_mito_genes_mouse(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) mt_genes = qc.get_mito_genes('mouse') assert isinstance(mt_genes,list) assert np.shape(mt_genes) == (37,) assert mt_genes[0] == 'ENSMUSG00000064336' assert mt_genes[-1] == 'ENSMUSG00000064372'
def test_metrics_no_filter_CSC(self): qc=QualityControl(self.csc_50x40,self.genes,self.barcodes) QC_metaobj_50x40_csc = qc.metrics(filter_count_matrix=False) # UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5, assert QC_metaobj_50x40_csc.shape == (self.csc_50x40.shape[0],6) assert np.all(QC_metaobj_50x40_csc.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio']) assert np.any(QC_metaobj_50x40_csc.isna()) == False np.testing.assert_approx_equal(sum(sum(QC_metaobj_50x40_csc.values)), 107641.13463,significant=4, err_msg='metrics sum incorrect')
def test_metrics_no_filter_DATAFRAME(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) QC_metaobj_50x40 = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5,filter_count_matrix=False) assert QC_metaobj_50x40.shape == (len(self.mtx_df_50x40),6) assert np.all(QC_metaobj_50x40.columns == ['nUMI', 'nFeatures', 'FeaturesPerUMI','log10FeaturesPerUMI', 'mtUMI','mitoRatio']) assert np.any(QC_metaobj_50x40.isna()) == False assert sum(sum(QC_metaobj_50x40.values)) == 107641.1346368048 np.testing.assert_approx_equal(sum(sum(QC_metaobj_50x40.values)), 107641.13463,significant=4, err_msg='metrics sum incorrect')
def test_null_inputs(self): with pytest.raises(ValueError,match=r"Empty matrix found!"): qc=QualityControl(self.mtx_df_empty,self.genes,self.barcodes) # change input to [] with pytest.raises(ValueError,match=r"Empty gene list found!"): qc=QualityControl(self.mtx_df_50x40,[],self.barcodes) with pytest.raises(ValueError,match=r"Empty barcode list found!"): qc=QualityControl(self.mtx_df_50x40,self.genes,[]) with pytest.raises(TypeError): qc=QualityControl([],self.genes,self.barcodes)
def test_metrics_with_filter_df_verbose(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata,fgenes,fbc, QC_metaobj_50x40 = qc.metrics(UMI_thresh = 1500, Features_thresh = 39, log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001, mtRatio_thresh = 0.5, filter_count_matrix=True, remove_cell_cycle=False, verbose=True)
def test_mat_to_csc(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) mat = qc.mat_to_csc(self.mtx_df_50x40) assert isinstance(mat,spsp.csc.csc_matrix) assert mat.shape == (50, 40) mat = qc.mat_to_csc(self.csr_50x40) assert isinstance(mat,spsp.csc.csc_matrix) assert mat.shape == (50, 40) mat = qc.mat_to_csc(spsp.coo.coo_matrix(self.mtx_df_50x40)) assert isinstance(mat,spsp.csc.csc_matrix) assert mat.shape == (50, 40)
def test_filter_with_QC_Obj_removeCC(self): '''test QC filter function with QC_metaobj (call metrics to get QC_metaobj and pass it to filter_count_matrix with mtx_df)''' qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) QC_metaobj_50x40 = qc.metrics(UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5,filter_count_matrix=False) fdata, fgenes, fbc = qc.filter_count_matrix(QC_metaobj_50x40,remove_cell_cycle=True,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002, FeaturesPerUMI_thresh = 0.0001,mtRatio_thresh = 0.5)#,nUMI=500,nFeatures=500,FeaturesPerUMI=0.3,mtRatio=0.05) assert isinstance(fdata,spsp.csc.csc_matrix) assert isinstance(fbc,list) assert isinstance(fgenes,list) assert np.shape(fdata) == (32,38) assert fdata.sum() == 60257
def test_filter_both(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) QC_metaobj_50x40 = qc.metrics(filter_count_matrix=False) fdata1,genes1,bc1 = qc.filter_count_matrix(QC_metaobj_50x40,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh= 0.0001,mtRatio_thresh = 0.5) qc2=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata2, genes2, bc2 = qc2.filter_count_matrix(QC_metaobj=None,remove_cell_cycle=False,UMI_thresh = 1500,Features_thresh = 39,log10FeaturesPerUMI_thresh = 0.002,FeaturesPerUMI_thresh= 0.0001,mtRatio_thresh = 0.5) assert fdata1.shape == fdata2.shape assert np.all(bc1 == bc2) assert np.all(genes1 == genes2)
def test_log10FeaturesPerUMI_thresh(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata1,fgenes1,fbc1 = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=.48) qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata2,fgenes2,fbc2, QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True,log10FeaturesPerUMI_thresh=.48) assert fdata1.shape == fdata2.shape
def test_filter_umi_threshold_error(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata,fgenes,fbc = qc.filter_count_matrix(UMI_thresh=900,remove_cell_cycle=True,verbose=True)
def test_filter_allFilters(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata,fgenes,fbc = qc.filter_count_matrix(UMI_thresh=1700,Features_thresh=39,FeaturesPerUMI_thresh=.02, log10FeaturesPerUMI_thresh=.48,mtRatio_thresh=.1) assert fdata.shape == (14, 40)
def test_filter_allFilters_compare(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) d0,b,a = qc.filter_count_matrix(UMI_thresh=1700) qc=QualityControl(d0,b,a) d1,b,a = qc.filter_count_matrix(Features_thresh=39) qc=QualityControl(d1,b,a) d2,b,a = qc.filter_count_matrix(FeaturesPerUMI_thresh=.02) qc=QualityControl(d2,b,a) d3,b,a = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=.48) qc=QualityControl(d3,b,a) d4,fgenes4,fbc4 = qc.filter_count_matrix(mtRatio_thresh=.1) qc=QualityControl(d4,fgenes4,fbc4) qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) d_all,fgenes_all,fbc_all = qc.filter_count_matrix(UMI_thresh=1700,Features_thresh=39,FeaturesPerUMI_thresh=.02, log10FeaturesPerUMI_thresh=.48,mtRatio_thresh=.1) assert (d0.shape[0],d1.shape[0],d2.shape[0],d3.shape[0],d4.shape[0],d_all.shape[0]) == (45, 30, 17, 17, 14, 14) assert np.all(fgenes4 == fgenes_all) assert np.all(fbc4 == fbc_all) assert np.sum(d4) == np.sum(d_all)
def test_filter_mtRatio(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata,fgenes,fbc = qc.filter_count_matrix(mtRatio_thresh=.1) assert fdata.shape == (43,40)
def test_filter_log10featuresPerUMI(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) fdata,fgenes,fbc = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=.48) assert fdata.shape == (43, 40)
def test_filter_log10featuresPerUMI_threshold_wrong_type(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) with pytest.raises(ValueError,match=r"log10featurePerUMI threshold must be an integer or float."): fdata,fgenes,fbc = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=list([1,2,3]))
def test_metrics_filter_no_args(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) with pytest.raises(ValueError): fdata,fgenes,fbc,QC_metaobj_50x40 = qc.metrics(filter_count_matrix=True)
def test_filter_mtRatio_threshold_wrong_type(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) with pytest.raises(ValueError,match=r"mtRatio threshold must be a float between 0 and 1."): fdata,fgenes, fbc = qc.filter_count_matrix(mtRatio_thresh=-1)
def test_wrong_matrix_type(self): with pytest.raises(TypeError): qc=QualityControl(np.asarray(self.mtx_df_50x40),self.genes,self.barcodes)
def test_filter_mtRatio_threshold_error(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) with pytest.raises(ValueError,match=r"MT ratio threshold too low, all samples would be removed."): fdata,fgenes,fbc = qc.filter_count_matrix(mtRatio_thresh=.000001)
def test_wrong_gene_length(self): with pytest.raises(ValueError): qc=QualityControl(self.mtx_df_50x40,self.genes[:-5],self.barcodes) with pytest.raises(ValueError): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes[:-5])
def test_get_mt_idx(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) mt_idx = qc.get_mt_idx(self.genes) mt_idx_assarray = qc.get_mt_idx(np.array(self.genes)) assert isinstance(mt_idx,list) assert mt_idx_assarray == [30, 31, 33]
def test_filter_log10featuresPerUMI_threshold_error(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) with pytest.raises(ValueError,match=r"log10 Feature per UMI threshold too high, all samples would be removed."): fdata,fgenes,fbc = qc.filter_count_matrix(log10FeaturesPerUMI_thresh=.9)
def test_get_cc_idx(self): qc=QualityControl(self.mtx_df_50x40,self.genes,self.barcodes) cc_idx = qc.get_cc_idx(self.genes) cc_idx_assarray = qc.get_cc_idx(np.array(self.genes)) assert isinstance(cc_idx_assarray,list) assert cc_idx_assarray == [32, 23]