def compare_db_to_file(file): stat = Stat() gnomAD = GnomAD() with open(file) as input: while (True): line = input.readline() if (not line): break v = Variant(line, gnomAD_connection=gnomAD) alt_list = v.alt_list() x1 = 0 af2 = v.get_gnomad_af() for alt in alt_list: af1 = gnomAD.get_af(v.chr_num(), v.lowest_coord(), v.ref(), alt) if (af1): x1 += 1 if (af1 == None and af2 == None): stat.f() elif (af1 == af2): stat.t() elif (af1): if (af2 == None): stat.one() else: stat.diff() else: stat.two() if (af2 and (x1 == 0)): stat.two0() return stat
def output_raw_calls(infile, outfile, limit=None, start=1): n_out = limit / 20 if limit > 0 else 100 n = 0 l = 0 result = list() with open(infile) as source, open(outfile, "w") as destination: while (True): line = source.readline() if (not line): break l += 1 if (l < start): continue v = Variant(line) n += 1 if (n % n_out == 0): print n alleles = v.alt_list() for allele in alleles: variant = dict() variant["chromosome"] = v.chr_num() variant["position"] = v.start() variant["reference"] = v.ref() variant["alternative"] = allele result.append(variant) json.dump(result, destination) print "Variants processed: {}".format(n)
def get_anfisa_json(self, chromosome, start, end, allele): region = "{}:{}:{}".format(chromosome, start, end) client = EnsemblRestClient() variants = client.get_consequences(region, allele) if (not variants or len(variants) == 0): return None records = [ Variant(v, connectors=self.connectors).get_view_json() for v in variants ] return records
def annotate(workspace): f = workspace["file"] metadata = get_md(f) if (metadata): version = Variant.get_version() old_version = metadata["versions"]["annotations"] if (old_version == version): print "Case is already annotated with the same version" return case = workspace["mongo-name"] fname = os.path.basename(f) casedir = os.path.dirname(f) path = casedir.split('/') case_full_id = None for p in path: if (case in p): case_full_id = p break if (not case_full_id): case_full_id = "{}_wgs".format(case) if (not case in fname): print "Skipping non-standard case: {}".format(fname) return remote = "{aws_user}@{annotation_server}".format( annotation_server=annotation_server, aws_user=aws_user) remote_dir = "/data/bgm/cases/{}".format(case) cmd = "export PYTHONPATH=/data/bgm/anfisa ; cd {remote_dir} ; python -m annotations.annotator ".format( remote_dir=remote_dir) cmd = "{base} -i {case_id}_seq_a_boo_regions.vep.json ".format( base=cmd, case_id=case_full_id) print "Running {cmd} on ${annotation_server}".format( cmd=cmd, annotation_server=annotation_server) ssh = 'ssh -t {remote} "{cmd}"'.format(cmd=cmd, remote=remote) print ssh ret = os.system(ssh) if (ret): raise Exception("Returned {}".format(ret)) result = "{}/{}_anfisa.json".format(remote_dir, case) scp = "scp {remote}:{result} {local}".format(remote=remote, local=f, result=result) print scp ret = os.system(scp) if (ret): raise Exception("Returned {}".format(ret))
with open(f) as input, open(out, "w") as out1, HGMD() as hgmd, \ GnomAD() as gnomAD, \ GTF() as gtf, \ SpliceAI() as spliceAI, \ ClinVar() as clinvar: cns = { "hgmd": hgmd, "gnomAD": gnomAD, "spliceAI": spliceAI, "liftover": hg19_to_38_converter, "clinvar": clinvar, "gtf": gtf.prepare_lookup(transcript=True), "beacon": beacon } metadata = Variant.get_metadata(vcf_header=vcf_header, samples=samples, case=case) print metadata["versions"] out1.write(json.dumps(metadata) + '\n') while (True): line = input.readline() if (not line): break l += 1 if (l < start): continue try: v = Variant(line, vcf_header=vcf_header, samples=samples, case=case,