예제 #1
0
 def test_file_extension_deletion_success(self):
     """
     Asserts that a pd.DataFrame is returned only if file extension matches.
     """
     self.assertTrue(
         isinstance(gi.file_extension(self.test_input2, 'deletion'),
                    pd.DataFrame))
예제 #2
0
 def test_flanking_regions_fasta_insertion(self):
     """
     Asserts that insertions are handled properly for both forward and reverse strand insertions.
     """
     dataframe = gi.file_extension(self.insertion_input2, 'insertion')
     flanking = gi.flanking_region_fasta_insertion(self.insertion_genome,
                                                   dataframe, 5)
     self.assertTrue(len(flanking) == 8)
예제 #3
0
 def test_match_chr_to_genome(self):
     """
     Asserts that a pd.DataFrame object is returned from function.
     chr format is mismatched between genome and input file, checks for conversion.
     """
     dataframe = gi.file_extension(self.test_input2, 'deletion')
     self.assertTrue(
         isinstance(
             gi.match_chr_to_genome(dataframe, self.genome, 'deletion'),
             pd.DataFrame))
예제 #4
0
 def test_flanking_regions_fasta_translocation(self):
     """
     Asserts that translocations are handled properly for both forward and reverse strand translocations.
     """
     dataframe = gi.file_extension(self.translocation_input2,
                                   'translocation')
     flanking = gi.flanking_region_fasta_translocation(
         self.insertion_genome, dataframe, 5)
     self.assertTrue(flanking[0][1] == 'CCAAAAATTT')
     self.assertTrue(flanking[1][1] == 'AAAATAATTT')
     self.assertTrue(flanking[2][1] == 'CCAAATTTTT')
     self.assertTrue(flanking[3][1] == 'AAAATTTTTT')
예제 #5
0
def iterator_sv(args):
    """ Use an input regions file with SV positions to pull
        down flanking sequence on both sides of SV to generate
        primers upstream and downstream of the SV.

    Args:
        args (Namespace): Argparse results.

    Returns: None
    """

    dataset_name = os.path.splitext(str(args.regions_file))[0]
    genome = giv.genome_iterator(args.ref_genome)
    # 2) create dataframe from input regions file
    if args.sv in ('deletion', 'inversion'):
        small_regions = giv.file_extension(args.regions_file, args.sv)
    # 3) ensure proper proper number of columns in dataframe
        assert len(list(small_regions)) == 5, "DataFrame contains more/less than 5 columns...\
                                           Improper format."
    # 4) format dataframe "chr" column to match reference genome
        small_regions = giv.match_chr_to_genome(small_regions, genome, args.sv)
    # 5) generate flanking regions fasta based on position in input file
        flanking = open("flanking_regions.%s.fasta" % dataset_name, 'w')
        if args.sv == 'deletion':
            flank_data = giv.flanking_regions_fasta_deletion(genome, small_regions, args.flanking_region_size)
            primer3_in = open("primer3_input.%s.txt" % dataset_name, 'w')
            for head, seq in flank_data:
                flanking.write(">"+head+'\n'+seq+'\n')
            # 6) generate primer3 input file
                primer3_in.write(utils.primer3_input(head, seq, args))
        elif args.sv == 'inversion':
            flank_data = giv.flanking_regions_fasta_inversion(genome, small_regions, args.flanking_region_size)
            primer3_in = open("primer3_input.%s.txt" % dataset_name, 'w')
            for head, seq in flank_data:
                flanking.write(">"+head+'\n'+seq+'\n')
            # 6) generate primer3 input file
                primer3_in.write(utils.primer3_input(head, seq, args))
        flanking.close()
        primer3_in.close()

    elif args.sv == 'insertion':
        small_regions = giv.file_extension(args.regions_file, args.sv)
        assert len(list(small_regions)) == 10, "DataFrame contains more/less than 10 columns... Exiting."
        small_regions = giv.match_chr_to_genome(small_regions, genome, args.sv)
        flanking = open("flanking_regions.%s.fasta" %dataset_name, 'w')
        flank_data = giv.flanking_region_fasta_insertion(genome, small_regions, args.flanking_region_size)
        primer3_in = open("primer3_input.%s.txt" % dataset_name, 'w')
        for head, seq in flank_data:
            flanking.write(">"+head+'\n'+seq+'\n')
            primer3_in.write(utils.primer3_input(head, seq, args))
    elif args.sv == 'translocation':
        small_regions = giv.file_extension(args.regions_file, args.sv)
        assert len(list(small_regions)) == 8, "DataFrame contains more/less than 8 columns... Exiting."
        small_regions = giv.match_chr_to_genome(small_regions, genome, args.sv)
        flanking = open("flanking_regions.%s.fasta" %dataset_name, 'w')
        flank_data = giv.flanking_region_fasta_translocation(genome, small_regions, args.flanking_region_size)
        primer3_in = open("primer3_input.%s.txt" %dataset_name, 'w')
        for head, seq in flank_data:
            flanking.write(">"+head+'\n'+seq+'\n')
            primer3_in.write(utils.primer3_input(head, seq, args))
    flanking.close()
    primer3_in.close()
예제 #6
0
 def test_file_extension_insertion_success(self):
     """
     Asserts that proper df is created when sv type is insertion.
     """
     self.assertEqual(
         len(gi.file_extension(self.insertion_input1, 'insertion')), 4)