def recode(self, out=None): if out is None: self.out_prefix = temp_file() else: self.out_prefix = out self.result = self.out_prefix + ".recode.vcf" self.cmd = self.cmd.bake("--recode") return self.execute()
def inner_join(a, b, out=None): a_snps = open(a.write_snplist().snplist()).readlines() b_snps = open(b.write_snplist().snplist()).readlines() union_snps = temp_file() o = open(union_snps, "w") u = list(set(a_snps) & set(b_snps)) for s in u: o.write(s) o.close() a_reduced = PlinkData(a.filename).extract(union_snps).make_bed() b_reduced = PlinkData(b.filename).extract(union_snps).make_bed() return a_reduced.bmerge(b_reduced, out)
def snps_only(self, out=None): if out is None: out = temp_file() + ".vcf.gz" o = gzip.GzipFile(out, "w") bases = ["A", "T", "G", "C", "a", "t", "g", "c"] header = None for line in self.f: if re.search("##", line): o.write(line) elif re.search("#", line): o.write(line) header = re.split("\t", line) else: info = re.split("\t", line) if info[header.index("REF")] in bases: if info[header.index("ALT")] in bases: o.write(line) o.close() return VCFData(out)
def reset_ids(self, out=None): if out is None: out = temp_file() + ".vcf.gz" o = gzip.GzipFile(out, "w") header = None for line in self.f: if re.search("##", line): o.write(line) elif re.search("#", line): o.write(line) header = re.split("\t", line) else: info = re.split("\t", line) new_id = (info[header.index("#CHROM")], info[header.index("POS")], info[header.index("REF")], info[header.index("ALT")]) info[header.index("ID")] = "%s_%s_%s_%s" % new_id o.write("\t".join(info)) o.close() return VCFData(out)
def execute(self, out=None): if out is None: out = temp_file() self.cmd("--out", out) return PlinkData(out)