Ejemplo n.º 1
0
    def __init__(self,
                 bedfile: str,
                 spacing: int = 20,
                 mindata: int = 50,
                 maxpool: int = None,
                 lines: list = None):
        # spacing should be >= 0
        if not type(spacing) == int or spacing < 0:
            print("ROI: Invalid spacing: {0}; defaulting to 0".format(spacing))
            spacing = 0

        # mindata should be > 0
        if not type(mindata) == int or mindata <= 0:
            print("ROI: Invalid mindata: {0}; defaulting to 1".format(mindata))
            mindata = 1

        print("Loading .bed data.")
        if maxpool:
            print("ROI limited to max number of pools = {0}".format(maxpool))
        self.source = bedfile

        if lines is None:
            bed_file = BedFileLoader(bedfile)
            lines = bed_file.expand_columns()

        self.amplicons = self.load_amplicons(lines, maxpool, -1)
        if self.amplicons:
            self.targets = self.define_targets(spacing, mindata)
        else:
            return
Ejemplo n.º 2
0
def test_search_map_for_general_bed():
    bed_file = BedFileLoader(mock_dir_path + 'general.bed')
    column_map = bed_file.get_map_with_column_indexes()
    assert column_map[bed_file._BedFileLoader__region_id]['search'] is None
    assert column_map[bed_file._BedFileLoader__attributes]['search'] is None
    assert column_map[
        bed_file._BedFileLoader__submitted_region]['search'] is None
Ejemplo n.º 3
0
def test_search_map_for_amplicon_cov():
    bed_file = BedFileLoader(mock_dir_path + 'amplicon_cov.tsv')
    column_map = bed_file.get_map_with_column_indexes()
    assert column_map[bed_file._BedFileLoader__region_id]['search'] is not None
    assert column_map[
        bed_file._BedFileLoader__attributes]['search'] is not None
    assert column_map[
        bed_file._BedFileLoader__submitted_region]['search'] is None
Ejemplo n.º 4
0
def test_search_map_for_effective_regions():
    bed_file = BedFileLoader(mock_dir_path + 'effective_regions.bed')
    column_map = bed_file.get_map_with_column_indexes()
    assert column_map[bed_file._BedFileLoader__region_id]['search'] is not None
    assert column_map[
        bed_file._BedFileLoader__attributes]['search'] is not None
    assert column_map[
        bed_file._BedFileLoader__submitted_region]['search'] is not None
Ejemplo n.º 5
0
 def __init__(self, filename: str):
     bed_file = BedFileLoader(filename)
     print('Loading coverage data from {}'.format(filename))
     # noinspection PyProtectedMember
     if bed_file.file_type != bed_file._BedFileLoader__amplicon_cov:
         print('{} is not a valid amplicon_coverage file'.format(filename))
         sys.exit(1)
     self.targets, self.counters = self.define_targets(
         bed_file.expand_columns(), bed_file.columns)
Ejemplo n.º 6
0
def test_loader_strip_chr_works():
    """
    test that when strip_chr is True, 'chr' is removed from first bed column
    """
    bed_lines = BedFileLoader(mock_dir_path + 'general.bed',
                              strip_chr=True).bed_lines
    assert bed_lines[0][0] == '7'
Ejemplo n.º 7
0
def test_loader_expanded_result_is_sorted_by_chrom_start_end():
    """
    test that when bed_lines are expanded the resulting list is sorted by
    chrom, chromStart, and chromEnd (ASC)
    """
    expanded = BedFileLoader(mock_dir_path +
                             'mock_for_split.bed').expand_columns()
    assert expanded[0] == ['chr1', '10000', '100000']
    assert expanded[1] == ['1', '10001', '100000']
    assert expanded[2] == ['12', '10000', '100000']
    assert expanded[3] == ['chr12', '10001', '100000']
Ejemplo n.º 8
0
def test_loader_ignores_non_valid_data_lines():
    """
    data lines are those that start with chrom\d* or \d*
    """
    bed_file = BedFileLoader(mock_dir_path + 'mock_for_split.bed')
    #  check headers
    assert len(bed_file.header_lines) == 3
    assert bed_file.header_lines[0] == 'track'
    assert bed_file.header_lines[1] == 'browser'
    assert bed_file.header_lines[2] == 'chromosome\tbut\tignore\tit\tplease'
    #  check data
    assert bed_file.bed_lines[0] == ['chr1', '10000', '100000']
    assert bed_file.bed_lines[1] == ['chr12', '10001', '100000']
    assert bed_file.bed_lines[2] == ['12', '10000', '100000']
    assert bed_file.bed_lines[3] == ['1', '10001', '100000']
Ejemplo n.º 9
0
def test_len_columns_for_amplicon_cov():
    bed_file = BedFileLoader(mock_dir_path + 'amplicon_cov.tsv')
    expanded = bed_file.expand_columns()
    assert len(bed_file.columns) == len(expanded[0])
Ejemplo n.º 10
0
def test_predict_file_type_for_amplicon_cov():
    assert BedFileLoader(mock_dir_path +
                         'amplicon_cov.tsv').file_type == 'amplicon_cov'
Ejemplo n.º 11
0
def test_predict_file_type_for_ampliseq_exome():
    assert BedFileLoader(mock_dir_path +
                         'ampliseq_exome.bed').file_type == 'ampliseq_exome'
Ejemplo n.º 12
0
def test_len_columns_for_effective_regions():
    bed_file = BedFileLoader(mock_dir_path + 'effective_regions.bed')
    expanded = bed_file.expand_columns()
    assert len(bed_file.columns) == len(expanded[0])
Ejemplo n.º 13
0
def test_predict_file_type_for_effective_regions():
    assert BedFileLoader(
        mock_dir_path +
        'effective_regions.bed').file_type == 'effective_regions'
Ejemplo n.º 14
0
def test_len_columns_for_general_bed():
    bed_file = BedFileLoader(mock_dir_path + 'general.bed')
    expanded = bed_file.expand_columns()
    assert len(bed_file.columns) == len(expanded[0])
Ejemplo n.º 15
0
def test_predict_file_type_for_general_bed():
    assert BedFileLoader(mock_dir_path +
                         'general.bed').file_type == 'general_tsv'
Ejemplo n.º 16
0
def test_len_columns_for_ampliseq_exome():
    bed_file = BedFileLoader(mock_dir_path + 'ampliseq_exome.bed')
    expanded = bed_file.expand_columns()
    assert len(bed_file.columns) == len(expanded[0])