def test_file_extension_deletion_success(self): """ Asserts that a pd.DataFrame is returned only if file extension matches. """ self.assertTrue( isinstance(gi.file_extension(self.test_input2, 'deletion'), pd.DataFrame))
def test_flanking_regions_fasta_insertion(self): """ Asserts that insertions are handled properly for both forward and reverse strand insertions. """ dataframe = gi.file_extension(self.insertion_input2, 'insertion') flanking = gi.flanking_region_fasta_insertion(self.insertion_genome, dataframe, 5) self.assertTrue(len(flanking) == 8)
def test_match_chr_to_genome(self): """ Asserts that a pd.DataFrame object is returned from function. chr format is mismatched between genome and input file, checks for conversion. """ dataframe = gi.file_extension(self.test_input2, 'deletion') self.assertTrue( isinstance( gi.match_chr_to_genome(dataframe, self.genome, 'deletion'), pd.DataFrame))
def test_flanking_regions_fasta_translocation(self): """ Asserts that translocations are handled properly for both forward and reverse strand translocations. """ dataframe = gi.file_extension(self.translocation_input2, 'translocation') flanking = gi.flanking_region_fasta_translocation( self.insertion_genome, dataframe, 5) self.assertTrue(flanking[0][1] == 'CCAAAAATTT') self.assertTrue(flanking[1][1] == 'AAAATAATTT') self.assertTrue(flanking[2][1] == 'CCAAATTTTT') self.assertTrue(flanking[3][1] == 'AAAATTTTTT')
def iterator_sv(args): """ Use an input regions file with SV positions to pull down flanking sequence on both sides of SV to generate primers upstream and downstream of the SV. Args: args (Namespace): Argparse results. Returns: None """ dataset_name = os.path.splitext(str(args.regions_file))[0] genome = giv.genome_iterator(args.ref_genome) # 2) create dataframe from input regions file if args.sv in ('deletion', 'inversion'): small_regions = giv.file_extension(args.regions_file, args.sv) # 3) ensure proper proper number of columns in dataframe assert len(list(small_regions)) == 5, "DataFrame contains more/less than 5 columns...\ Improper format." # 4) format dataframe "chr" column to match reference genome small_regions = giv.match_chr_to_genome(small_regions, genome, args.sv) # 5) generate flanking regions fasta based on position in input file flanking = open("flanking_regions.%s.fasta" % dataset_name, 'w') if args.sv == 'deletion': flank_data = giv.flanking_regions_fasta_deletion(genome, small_regions, args.flanking_region_size) primer3_in = open("primer3_input.%s.txt" % dataset_name, 'w') for head, seq in flank_data: flanking.write(">"+head+'\n'+seq+'\n') # 6) generate primer3 input file primer3_in.write(utils.primer3_input(head, seq, args)) elif args.sv == 'inversion': flank_data = giv.flanking_regions_fasta_inversion(genome, small_regions, args.flanking_region_size) primer3_in = open("primer3_input.%s.txt" % dataset_name, 'w') for head, seq in flank_data: flanking.write(">"+head+'\n'+seq+'\n') # 6) generate primer3 input file primer3_in.write(utils.primer3_input(head, seq, args)) flanking.close() primer3_in.close() elif args.sv == 'insertion': small_regions = giv.file_extension(args.regions_file, args.sv) assert len(list(small_regions)) == 10, "DataFrame contains more/less than 10 columns... Exiting." small_regions = giv.match_chr_to_genome(small_regions, genome, args.sv) flanking = open("flanking_regions.%s.fasta" %dataset_name, 'w') flank_data = giv.flanking_region_fasta_insertion(genome, small_regions, args.flanking_region_size) primer3_in = open("primer3_input.%s.txt" % dataset_name, 'w') for head, seq in flank_data: flanking.write(">"+head+'\n'+seq+'\n') primer3_in.write(utils.primer3_input(head, seq, args)) elif args.sv == 'translocation': small_regions = giv.file_extension(args.regions_file, args.sv) assert len(list(small_regions)) == 8, "DataFrame contains more/less than 8 columns... Exiting." small_regions = giv.match_chr_to_genome(small_regions, genome, args.sv) flanking = open("flanking_regions.%s.fasta" %dataset_name, 'w') flank_data = giv.flanking_region_fasta_translocation(genome, small_regions, args.flanking_region_size) primer3_in = open("primer3_input.%s.txt" %dataset_name, 'w') for head, seq in flank_data: flanking.write(">"+head+'\n'+seq+'\n') primer3_in.write(utils.primer3_input(head, seq, args)) flanking.close() primer3_in.close()
def test_file_extension_insertion_success(self): """ Asserts that proper df is created when sv type is insertion. """ self.assertEqual( len(gi.file_extension(self.insertion_input1, 'insertion')), 4)