def write_map_maker(vcf_fhand, parents_a, parents_b, genos_fhand, pop_type=DEF_MAPMAKER_POP, phys_map_fhand=None, coder_threshold=DEF_AB_CODER_THRESHOLD, smooth_threshold=None, recomb_threshold=None): sep = '\t' coder = ABCoder(vcf_fhand, parents_a, parents_b, parent_index_threshold=coder_threshold, smooth_threhsold=smooth_threshold, recomb_threshold=recomb_threshold) if pop_type not in ['f2 intercross', 'f2 backcross', 'f3 self', 'ri self', 'ri sib']: msg = 'Defined pop type \'{0}\' does not match an allowed pop_type ' msg += '(f2 intercross, f2 backcross, f3 self, ri self or ri sib)' raise ValueError(msg.format(pop_type)) samples = coder.offspring snp_ids = [] snp_genos = [] coding = 'ascii' if phys_map_fhand is not None: phys_map_fhand.write('marker\tposition\tChromosome\n') for snp, genos in coder.recode_genotypes(samples): snp_id = get_or_create_id(snp).encode(coding) snp_ids.append(snp_id) geno_array = array('c') for geno in genos.values(): geno_array.append(_code_to_one_letter(geno)) snp_genos.append(geno_array) if phys_map_fhand is not None: phys_map_fhand.write(snp_id) phys_map_fhand.write(sep) phys_map_fhand.write(str(snp.POS)) phys_map_fhand.write(sep) phys_map_fhand.write(snp.CHROM.encode(coding)) phys_map_fhand.write('\n') genos_fhand.write('data type ' + pop_type + '\n') genos_fhand.write('{0} {1} 1\n'.format(len(samples), len(snp_genos))) for snp_id, snp_geno in zip(snp_ids, snp_genos): genos_fhand.write('*{0}'.format(snp_id)) genos_fhand.write(sep) genos_fhand.write(sep.join(snp_geno)) genos_fhand.write('\n') genos_fhand.write('\n') genos_fhand.write('*sample_names' + sep) encoded_samples = [sample.encode(coding) for sample in samples] genos_fhand.write(sep.join(encoded_samples)) genos_fhand.write('\n') return coder
def write_parent_checker(vcf_fhand, parents_a, parents_b, genos_fhand, phys_map_fhand=None, coder_threshold=DEF_AB_CODER_THRESHOLD, smooth_threshold=None, recomb_threshold=None): sep = '\t' coder = ABCoder(vcf_fhand, parents_a, parents_b, parent_index_threshold=coder_threshold, smooth_threhsold=smooth_threshold, recomb_threshold=recomb_threshold) samples = coder.offspring snp_ids = [] snp_genos = [] coding = 'ascii' if phys_map_fhand is not None: phys_map_fhand.write('marker\tposition\tChromosome\n') for snp, genos in coder.recode_genotypes(samples): snp_id = get_or_create_id(snp).encode(coding) snp_ids.append(snp_id) geno_array = array('c') for geno in genos.values(): geno_array.append(_code_to_one_letter(geno)) snp_genos.append(geno_array) if phys_map_fhand is not None: phys_map_fhand.write(snp_id) phys_map_fhand.write(sep) phys_map_fhand.write(str(snp.POS)) phys_map_fhand.write(sep) phys_map_fhand.write(snp.CHROM.encode(coding)) phys_map_fhand.write('\n') genos_fhand.write('ID') genos_fhand.write(sep) genos_fhand.write(sep.join(snp_ids)) genos_fhand.write('\n') for sample_idx, sample in enumerate(samples): genos_fhand.write(sample.encode(coding)) genos_fhand.write(sep) to_write = sep.join(snp_genos[snp_idx][sample_idx] for snp_idx in range(len(snp_ids))) genos_fhand.write(to_write) genos_fhand.write('\n') return coder
def test_smooth(self): vcf = '''#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\tS1\tS2\tS3\tS4\tS5\tS6 20\t11\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t1/1\t1/1\t1/1\t0/0 20\t14\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0 20\t15\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0 20\t16\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0 20\t17\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0 20\t18\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t0/0\t0/0 20\t19\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t1/0 20\t20\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t1/1 20\t21\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0 20\t22\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t1/1 20\t23\t.\tG\tA\t29\tPASS\tNS=3\tGT\t0/0\t1/1\t0/0\t1/1\t1/1\t0/0 ''' fhand = StringIO(self.VCF_HEADER + vcf) coder = ABCoder(fhand, parents_a=['S1'], parents_b=['S2'], parent_index_threshold=0.9, smooth_threhsold=0.5, window=7) assert coder._create_windows(8) == ((5, 11), (2, 8), (8, 14)) result = coder.recode_genotypes(samples=coder.offspring) return expected = '''11 AA,BB,BB,AA 14 AA,BB,BB,AA 15 AA,BB,BB,AA 16 AA,BB,BB,AA 17 AA,BB,BB,AA 18 AA,BB,BB,AA 19 AA,BB,BB,.. 20 AA,BB,BB,.. 21 AA,BB,BB,AA 22 AA,BB,BB,.. 23 AA,BB,BB,AA ''' assert self._ab_result_to_str(result) == expected fhand = StringIO(self.VCF_HEADER + vcf) coder = ABCoder(fhand, parents_a=['S1'], parents_b=['S2'], parent_index_threshold=0.9, smooth_threhsold=0.6, recomb_threshold=2, window=7) result = coder.recode_genotypes(samples=coder.offspring) expected = '''11 ..,BB,BB,AA 14 AA,BB,BB,AA 15 AA,BB,BB,AA 16 AA,BB,BB,AA 17 AA,BB,BB,AA 18 AA,BB,BB,AB 19 AA,BB,BB,AB 20 AA,BB,BB,AB 21 AA,BB,BB,AB 22 AA,BB,BB,AB 23 AA,BB,BB,AB ''' assert self._ab_result_to_str(result) == expected fhand = NamedTemporaryFile(suffix='.png') coder.plot_smooth_hist(fhand)
def test_ab_coding(self): fhand = StringIO(self.VCF_HEADER + self.vcf) coder = ABCoder(fhand, parents_a=['S1'], parents_b=['S2'], parent_index_threshold=0.9) assert coder.offspring == ['S3', 'S4', 'S5', 'S6'] try: list(coder.recode_genotypes()) self.fail('RuntimeError expected') except RuntimeError: pass fhand = StringIO(self.VCF_HEADER + self.vcf) coder = ABCoder(fhand, parents_a=['S1'], parents_b=['S2'], parent_index_threshold=0.9) result = coder.recode_genotypes(samples=coder.offspring) string = '' for snp, geno in result: string += str(snp.POS) + ' ' string += ','.join(''.join(geno) for geno in geno.values()) string += '\n' assert string == '''11 AA,AA,BB,BB 16 AA,AA,BB,BB 17 AA,AA,BB,BB ''' assert sum(coder.log.values()) == 6 assert coder.log[NOT_ENOUGH_SUPPORT] == 2 assert coder.log[ENOUGH_SUPPORT] == 3 fhand = StringIO() coder.write_log(fhand) assert '6 SNPs ' in fhand.getvalue()
def test_ab_coding(self): fhand = StringIO(self.VCF_HEADER + self.vcf) coder = ABCoder(fhand, parents_a=["S1"], parents_b=["S2"], threshold=0.9) assert coder.offspring == ["S3", "S4", "S5", "S6"] try: list(coder.recode_genotypes()) self.fail("RuntimeError expected") except RuntimeError: pass fhand = StringIO(self.VCF_HEADER + self.vcf) coder = ABCoder(fhand, parents_a=["S1"], parents_b=["S2"], threshold=0.9) result = coder.recode_genotypes(samples=coder.offspring) string = "" for snp, geno in result: string += str(snp.POS) + " " string += ",".join("".join(geno) for geno in geno.values()) string += "\n" assert sum(coder.log.values()) == 6 assert coder.log[NOT_ENOUGH_SUPPORT] == 2 assert coder.log[ENOUGH_SUPPORT] == 3 fhand = StringIO() coder.write_log(fhand) assert "6 SNPs " in fhand.getvalue()