def test_open_mutations_without_header(self): ''' test that we fail for files without correct headers ''' lines = [['TEST', '1', '200', 'A', 'C', 'missense_variant']] temp = self.write_temp(lines) with self.assertRaises(ValueError): open_mutations(temp.name)
def test_open_mutations_consequence_noninteger_positions(self): ''' test that we fail if the nucleotide position is not an integer ''' lines = [['symbol', 'chrom', 'pos', 'ref', 'alt', 'consequence'], ['TEST', '1', '200.0', 'A', 'C', 'missense_variant']] temp = self.write_temp(lines) with self.assertRaises(ValueError): open_mutations(temp.name)
def test_open_mutations_multiple_genes(self): ''' test that we load mutations in multiple genes correctly ''' lines = [['symbol', 'chrom', 'pos', 'ref', 'alt', 'consequence'], ['TEST', '1', '200', 'A', 'C', 'missense_variant'], ['TEST2', '1', '300', 'A', 'C', 'missense_variant']] temp = self.write_temp(lines) self.assertEqual( open_mutations(temp.name), { 'TEST': [{ 'chrom': '1', 'pos': 200, 'ref': 'A', 'alt': 'C', 'consequence': 'missense_variant' }], 'TEST2': [{ 'chrom': '1', 'pos': 300, 'ref': 'A', 'alt': 'C', 'consequence': 'missense_variant' }] })
def main(): args = get_options() ensembl = EnsemblRequest(args.cache, args.genome_build) cadd = pysam.TabixFile(args.cadd) constraint = load_regional_constraint(args.constraint) # open de novo mutations all_de_novos = open_mutations(args.de_novos) mut_dict = load_mutation_rates() output = open(args.output, 'w') output.write('symbol\tseverity_p_value\n') for symbol in all_de_novos: if symbol in ['', '.']: continue print(symbol) de_novos = all_de_novos[symbol] p_value = analyse_gene(ensembl, mut_dict, cadd, symbol, de_novos, constraint, WEIGHTS) line = '{}\t{}\n'.format(symbol, p_value) output.write(line)
def test_open_mutations_consequence(self): ''' test that we load indels correctly ''' lines = [['symbol', 'chrom', 'pos', 'ref', 'alt', 'consequence'], ['TEST', '1', '200', 'A', 'C', 'synonymous_variant']] temp = self.write_temp(lines) # check that variants that do not alter protein sequence are excluded self.assertEqual(open_mutations(temp.name), {})
def test_open_mutations_indels(self): ''' test that we load indels correctly ''' lines = [['symbol', 'chrom', 'pos', 'ref', 'alt', 'consequence'], ['TEST', '1', '200', 'A', 'C', 'missense_variant'], ['TEST2', '1', '300', 'A', 'CG', 'frameshift_variant']] temp = self.write_temp(lines) # check that indels are excluded by default self.assertEqual( open_mutations(temp.name), { 'TEST': [{ 'chrom': '1', 'pos': 200, 'ref': 'A', 'alt': 'C', 'consequence': 'missense_variant' }] }) # check that if we pass the indels=True argument, then we pick up indels self.assertEqual( open_mutations(temp.name, indels=True), { 'TEST': [{ 'chrom': '1', 'pos': 200, 'ref': 'A', 'alt': 'C', 'consequence': 'missense_variant' }], 'TEST2': [{ 'chrom': '1', 'pos': 300, 'ref': 'A', 'alt': 'CG', 'consequence': 'frameshift_variant' }] })
def test_open_mutations_column_order(self): ''' test that we can load files with different column orders ''' # scramble the column order lines = [['chrom', 'ref', 'alt', 'consequence', 'symbol', 'pos'], ['1', 'A', 'C', 'missense_variant', 'TEST', '200']] temp = self.write_temp(lines) # check that we still pick up the correct columns based on the header self.assertEqual( open_mutations(temp.name), { 'TEST': [{ 'chrom': '1', 'pos': 200, 'ref': 'A', 'alt': 'C', 'consequence': 'missense_variant' }] }) # add an extra column, and check that the file is loaded as per usual lines = [[ 'chrom', 'ref', 'alt', 'consequence', 'symbol', 'pos', 'extra' ], ['1', 'A', 'C', 'missense_variant', 'TEST', '200', 'something']] self.assertEqual( open_mutations(temp.name), { 'TEST': [{ 'chrom': '1', 'pos': 200, 'ref': 'A', 'alt': 'C', 'consequence': 'missense_variant' }] })