def test_two_splitters(): """ Test how split_strings splits a string with two splitters """ string = "1:a,2:b" splitters = [',', ':'] assert set(split_strings(string, splitters)) == set(['1', 'a', '2', 'b'])
def test_complex(): """ Test how split_strings splits a complex string """ string = "a:1,b:2|3" splitters = [',',':','|'] assert set(split_strings(string, splitters)) == set(['1','a','2','b','3'])
def test_two_splitters(): """ Test how split_strings splits a string with two splitters """ string = "1:a,2:b" splitters = [',',':'] assert set(split_strings(string, splitters)) == set(['1','a','2','b'])
def test_complex(): """ Test how split_strings splits a complex string """ string = "a:1,b:2|3" splitters = [',', ':', '|'] assert set(split_strings(string, splitters)) == set(['1', 'a', '2', 'b', '3'])
def get_entry(self, variant_line=None, variant_dict=None, raw_entry=None, vcf_header=None, csq_format=None, dict_key=None, individual_id=None): """Return the splitted entry from variant information Args: variant_line (str): A vcf formated variant line vcf_header (list): A list with the vcf header line csq_format (list): A list with the csq headers family_id (str): The family id that should be searched. If no id the first family found will be used Returns: entry (list): A list with the splitted entry """ if not raw_entry: raw_entry = self.get_raw_entry( variant_line=variant_line, variant_dict=variant_dict, vcf_header=vcf_header, individual_id=individual_id, dict_key=dict_key ) entry = [] if raw_entry: if self.field in ['CHROM', 'POS', 'REF', 'QUAL']: # We know these fields allways has one entry entry = [raw_entry] elif self.field in ['ID', 'FILTER']: # We know ID is allways splitted on ';' entry = raw_entry.split(';') elif self.field == 'ALT': # We know ALT is allways splitted on ',' entry = raw_entry.split(',') elif self.field == 'FORMAT': entry = raw_entry.split(':') elif self.field == 'INFO': # We are going to treat csq fields separately if self.info_key == 'CSQ': if not csq_format: raise IOError("If CSQ the csq format must be provided") if not self.csq_key: raise IOError("If CSQ a csq key must be provided") for i, head in enumerate(csq_format): if head == self.csq_key: # This is the csq entry we are looking for csq_column = i # CSQ entries are allways splitted on ',' for csq_entry in raw_entry.split(','): entry += split_strings(csq_entry.split('|')[csq_column], self.separators) else: if self.dict_entry: separators = self.separators[2:] else: separators = self.separators entry = split_strings(raw_entry, separators) elif self.field == 'sample_id': if not self.separators: entry = split_strings(raw_entry, '/') #If variant calls are phased we need to split on '|' if len(entry) == 1: entry = split_strings(raw_entry, '|') else: entry = split_strings(raw_entry, self.separators) return entry
def test_simple_string(): """Test how split_strings splits a simple string""" string = "1,2" splitters = [','] assert set(split_strings(string, splitters)) == set(['1', '2'])
def test_simple_string_no_splitters(): """Test how split_strings splits a simple string""" string = "1,2" splitters = [] assert set(split_strings(string, splitters)) == set(['1,2'])
def test_simple_string(): """Test how split_strings splits a simple string""" string = "1,2" splitters = [','] assert set(split_strings(string, splitters)) == set(['1','2'])