def main(): """Main for imfusion-ctg.""" logger = logging.getLogger() args = parse_args() # Read insertions and filter for depth. insertions = list(Insertion.from_csv(args.insertions, sep='\t')) if args.min_depth is not None: insertions = [ ins for ins in insertions if ins.support >= args.min_depth ] # Identify CTGs. logger.info('Testing for CTGs') if args.window is not None: logger.info('- Using window (%d, %d)', *args.window) reference = Reference(args.reference) ctgs = test_ctgs(insertions, reference=reference, gene_ids=args.gene_ids, chromosomes=args.chromosomes, pattern=args.pattern, window=args.window) # Filter using given threshold. if args.threshold is not None: ctgs = ctgs.query('q_value <= {}'.format(args.threshold)) # If expression is given, test for differential expression. if args.expression is not None: logger.info('Testing for differential expression') # Perform DE tests. exon_counts = read_exon_counts(args.expression) de_results = test_de(insertions, exon_counts, gene_ids=ctgs['gene_id']) # Combine with CTG result. de_results = de_results.rename( columns={ 'direction': 'de_direction', 'p_value': 'de_pvalue', 'test_type': 'de_test' }) col_order = (list(ctgs.columns) + ['de_test', 'de_direction', 'de_pvalue']) ctgs = pd.merge(ctgs, de_results, on='gene_id', how='left')[col_order] if args.de_threshold is not None: # Filter for non-significant genes, keeping nans. ctgs = ctgs.loc[~(ctgs['de_pvalue'] > args.de_threshold)] # Write outputs. logger.info('Writing outputs') ctgs.to_csv(str(args.output), sep='\t', index=False)
def main(): """Main for imfusion-ctg.""" logger = logging.getLogger() args = parse_args() # Read insertions and filter for depth. insertions = list(Insertion.from_csv(args.insertions, sep='\t')) if args.min_depth is not None: insertions = [ ins for ins in insertions if ins.support >= args.min_depth ] # Identify CTGs. logger.info('Testing for CTGs') if args.window is not None: logger.info('- Using window (%d, %d)', *args.window) reference = Reference(args.reference) ctgs = test_ctgs( insertions, reference=reference, gene_ids=args.gene_ids, chromosomes=args.chromosomes, pattern=args.pattern, window=args.window) # Filter using given threshold. if args.threshold is not None: ctgs = ctgs.query('q_value <= {}'.format(args.threshold)) # If expression is given, test for differential expression. if args.expression is not None: logger.info('Testing for differential expression') # Perform DE tests. exon_counts = read_exon_counts(args.expression) de_results = test_de(insertions, exon_counts, gene_ids=ctgs['gene_id']) # Combine with CTG result. de_results = de_results.rename(columns={ 'direction': 'de_direction', 'p_value': 'de_pvalue', 'test_type': 'de_test' }) col_order = ( list(ctgs.columns) + ['de_test', 'de_direction', 'de_pvalue']) ctgs = pd.merge(ctgs, de_results, on='gene_id', how='left')[col_order] if args.de_threshold is not None: # Filter for non-significant genes, keeping nans. ctgs = ctgs.loc[~(ctgs['de_pvalue'] > args.de_threshold)] # Write outputs. logger.info('Writing outputs') ctgs.to_csv(str(args.output), sep='\t', index=False)
def test_example_with_chromosomes(self, ctg_insertions, ctg_reference): """Tests subsetting for specific chromosomes.""" # Do CTG test. result = ctg.test_ctgs( ctg_insertions, ctg_reference, chromosomes=['1'], per_sample=True) assert len(result) == 2 assert set(result['gene_id']) == {'gene_a', 'gene_b'}
def test_empty(self, ctg_reference): """Test example without insertions.""" result = ctg.test_ctgs( [], ctg_reference, window=(4, 0), per_sample=False) assert len(result) == 0 assert list(result.columns) == [ 'gene_id', 'p_value', 'q_value', 'gene_name', 'n_samples' ]
def test_example_with_chromosomes(self, ctg_insertions, ctg_reference): """Tests subsetting for specific chromosomes.""" # Do CTG test. result = ctg.test_ctgs(ctg_insertions, ctg_reference, chromosomes=['1'], per_sample=True) assert len(result) == 2 assert set(result['gene_id']) == {'gene_a', 'gene_b'}
def test_example_with_window(self, ctg_insertions, ctg_reference): """Tests applying a gene window.""" # TODO: check the generated windows. # Do CTG test. result = ctg.test_ctgs( ctg_insertions, ctg_reference, window=(4, 0), per_sample=False) result = result.set_index('gene_id') # Check result. assert result.loc['gene_a', 'p_value'] < 0.05
def test_example_with_collapse(self, ctg_insertions, ctg_reference): """Tests if gene_a is no longer significant after collapsing.""" # Do CTG test. result = ctg.test_ctgs(ctg_insertions, ctg_reference, per_sample=True) result = result.set_index('gene_id') # Check results. assert len(result) == 3 assert result.loc['gene_a', 'p_value'] > 0.05 assert result.loc['gene_b', 'p_value'] > 0.05 assert result.loc['gene_c', 'p_value'] > 0.05
def test_empty(self, ctg_reference): """Test example without insertions.""" result = ctg.test_ctgs([], ctg_reference, window=(4, 0), per_sample=False) assert len(result) == 0 assert list(result.columns) == [ 'gene_id', 'p_value', 'q_value', 'gene_name', 'n_samples' ]
def test_example(self, ctg_insertions, ctg_reference): """Test example with three genes.""" # TODO: P-values seem higher than in previous tests. Check why. # Do CTG test. result = ctg.test_ctgs(ctg_insertions, ctg_reference, per_sample=False) result = result.set_index('gene_id') # Check results. assert len(result) == 3 assert result.loc['gene_a', 'p_value'] < 0.05 assert result.loc['gene_b', 'p_value'] > 0.05 assert result.loc['gene_c', 'p_value'] > 0.05
def test_example_with_window(self, ctg_insertions, ctg_reference): """Tests applying a gene window.""" # TODO: check the generated windows. # Do CTG test. result = ctg.test_ctgs(ctg_insertions, ctg_reference, window=(4, 0), per_sample=False) result = result.set_index('gene_id') # Check result. assert result.loc['gene_a', 'p_value'] < 0.05