예제 #1
0
	def run(self, variants_path):
		"""
		Run the VEP service and save results in a temporary file.

		:param variants_path: File with variants. In BED format. http://www.ensembl.org/info/docs/variation/vep/vep_script.html#custom_formats
		:return: True if successfull or False otherwise
		"""

		if self.results_path is None:
			self.results_path = tempfile.mkstemp()[1]

		with open(self.results_path, "w") as rf:
			with open(variants_path, "r") as vf:
				column_types = (str, int, int, str, str, int)
				for fields in tsv.lines(vf, column_types):
					chr, start, end, allele, strand, var_id = fields

					alt = allele[allele.find("/") + 1:]

					results = self.get(chr, start, end, strand, alt, var_id)
					if results is None:
						continue

					for r in results:
						rf.write(tsv.line_text(
							var_id, chr, start, allele,
							r.gene, r.transcript, ",".join(sorted(r.consequences)),
							r.protein_pos, r.aa_change, r.protein,
							r.sift, r.polyphen, null_value="-"))
예제 #2
0
def liftover(project):
    log = task.logger
    conf = task.conf

    config = GlobalConfig(conf)

    lifted_project_port = task.ports("lifted_projects")

    log.info("--- [{0}] --------------------------------------------".format(project["id"]))

    log.info("Preparing liftOver files ...")

    in_path = make_temp_file(task, suffix=".bed")
    in_file = open(in_path, "w")
    out_path = make_temp_file(task, suffix=".bed")
    unmapped_path = os.path.join(project["temp_path"], "liftover_unmapped.bed")

    projdb = ProjectDb(project["db"])

    for var in projdb.variants(order_by="position"):
        in_file.write(tsv.line_text("chr" + var.chr, var.start, var.start + len(var.ref), var.id))

    in_file.close()

    log.info("Running liftOver ...")

    project["from_assembly"] = project["assembly"]
    project["assembly"] = "hg19"

    cmd = " ".join(
        [
            conf["liftover_bin"],
            in_path,
            os.path.join(conf["liftover_chain_path"], "hg18ToHg19.over.chain"),
            out_path,
            unmapped_path,
        ]
    )

    log.debug(cmd)

    subprocess.call(cmd, shell=True)

    log.info("Annotating unmapped variants ...")

    count = 0
    with open(unmapped_path, "r") as f:
        for line in f:
            if line.lstrip().startswith("#"):
                continue
            fields = line.rstrip().split("\t")
            var_id = int(fields[3])
            projdb.update_variant_start(var_id, start=None)
            count += 1

    log.info("  {0} unmapped variants annotated".format(count))

    log.info("Updating variants ...")

    count = 0
    with open(out_path, "r") as f:
        for line in f:
            fields = line.rstrip().split("\t")
            chr, start, end, var_id = fields
            projdb.update_variant_start(var_id, start=start)
            count += 1

    log.info("  {0} variants".format(count))

    remove_temp(task, in_path, out_path)

    projdb.commit()
    projdb.close()

    lifted_project_port.send(project)