Example #1
0
class MaService(object):
	HOST = "mutationassessor.org"

	ISSUE_UNKNOWN_ID_TYPE = re.compile(r"unknown ID type")
	ISSUE_REFERENCE_ALLELE = re.compile(r"reference allele: ([ACGT])")

	def __init__(self, assembly, cache_path=None, max_retries=3, max_freq=3):
		self.assembly = assembly
		self.cache_path = cache_path

		self.__restful = Request(max_retries=max_retries, max_freq=max_freq)

	def get(self, chr, strand, start, ref, alt, var_id=None):
		done = False
		while not done:
			response = self.__restful.get("http://{0}/".format(self.HOST),
				params={
					"cm" : "var",
					"var" : "{0},{1},{2},{3},{4}".format(self.assembly, chr, start, ref, alt),
					"frm" : "txt",
					"fts" : "all"
				})

			if response is None:
				return None

			hdr = response.readline().rstrip("\n").split("\t")
			fields = response.readline().rstrip("\n").split("\t")
			hlen = len(hdr)
			if hlen == 0 or hlen != len(fields):
				return None

			r = {}
			for i in range(hlen):
				r[hdr[i]] = fields[i] if len(fields[i]) > 0 else None

			mapping_issue = r["Mapping issue"]
			if mapping_issue is not None:
				if self.ISSUE_UNKNOWN_ID_TYPE.match(mapping_issue):
					return None

				m = self.ISSUE_REFERENCE_ALLELE.match(mapping_issue)
				if m is not None:
					ref = m.group(1)
					strand = {"+" : "-", "-" : "+"}[strand]
					continue

					#TODO check: raise Exception("Infinite mapping issue for reference allele")

			done = True

		uniprot=r["Uniprot"]
		fi_score=cast_type(r["FI score"], float)
		snps_pos=r["SNPs@position"]

		if uniprot is not None or fi_score is not None or snps_pos is not None:
			return MaResult(
				var_id=var_id, chr=chr, start=start, ref=ref, alt=alt,
				uniprot=uniprot, fi_score=fi_score, snps_pos=snps_pos)
		else:
			return None

	def close(self):
		pass
Example #2
0
	def __init__(self, cache_path, max_retries=3, max_freq=3):
		self.cache_path = cache_path
		self.results_path = None

		self.__restful = Request(max_retries=max_retries, max_freq=max_freq)
Example #3
0
	def __init__(self, assembly, cache_path=None, max_retries=3, max_freq=3):
		self.assembly = assembly
		self.cache_path = cache_path

		self.__restful = Request(max_retries=max_retries, max_freq=max_freq)
Example #4
0
class VepService(object):
	HOST = "beta.rest.ensembl.org"

	VEP_STRAND = { "+" : "1", "-" : "-1", "1" : "1", "-1" : "-1" }

	def __init__(self, cache_path, max_retries=3, max_freq=3):
		self.cache_path = cache_path
		self.results_path = None

		self.__restful = Request(max_retries=max_retries, max_freq=max_freq)

	def __parse_response(self, var_id, chr, start, end, strand, alt, response):
		root = json.load(response)

		if not isinstance(root, dict):
			raise Exception("Unexpected result from VEP web service:\n{0}".format(json.dumps(root)))

		results = []
		found = set()

		tag = ":".join([chr, str(start), str(end), strand, alt])

		for data in root["data"]:
			#chromosome = data["location"]["name"];
			#start = data["location"]["start"];

			for trans in data["transcripts"]:
				gene = trans.get("gene_id");
				transcript = trans.get("transcript_id")

				tstart = trans.get("translation_start")
				tend = trans.get("translation_end")
				if tstart is not None and tend is not None and tstart != tend:
					protein_pos = "{0}-{1}".format(tstart, tend)
				elif tstart is not None:
					protein_pos = tstart
				elif tend is not None:
					protein_pos = tend
				else:
					protein_pos = None

				protein = trans.get("translation_stable_id")

				for allele in trans.get("alleles", []):
					consequences = allele.get("consequence_terms")
					#allele_string = allele["allele_string"]
					aa_change = allele.get("pep_allele_string")
					sift_score = allele.get("sift_score")
					polyphen_score = allele.get("polyphen_score")

					key = "{0}|{1}".format(tag, transcript)

					if key not in found:
						found.add(key)

						results += [VepResult(
										var_id=var_id, chr=chr, start=start, allele=allele,
										gene=gene, transcript=transcript, consequences=consequences,
										protein_pos = protein_pos, aa_change=aa_change, protein=protein,
										sift=sift_score, polyphen=polyphen_score)]

		return results

	def get(self, chr, start, end, strand, alt, var_id=None):
		strand = self.VEP_STRAND[strand]

		url = "http://{0}/vep/human/{1}:{2}-{3}:{4}/{5}/consequences".format(
			self.HOST, chr, start, end, strand, alt)

		response = self.__restful.get(url, headers={"Content-type" : "application/json"})
		if response is None:
			return None

		return self.__parse_response(var_id, chr, start, end, strand, alt, response)

	def run(self, variants_path):
		"""
		Run the VEP service and save results in a temporary file.

		:param variants_path: File with variants. In BED format. http://www.ensembl.org/info/docs/variation/vep/vep_script.html#custom_formats
		:return: True if successfull or False otherwise
		"""

		if self.results_path is None:
			self.results_path = tempfile.mkstemp()[1]

		with open(self.results_path, "w") as rf:
			with open(variants_path, "r") as vf:
				column_types = (str, int, int, str, str, int)
				for fields in tsv.lines(vf, column_types):
					chr, start, end, allele, strand, var_id = fields

					alt = allele[allele.find("/") + 1:]

					results = self.get(chr, start, end, strand, alt, var_id)
					if results is None:
						continue

					for r in results:
						rf.write(tsv.line_text(
							var_id, chr, start, allele,
							r.gene, r.transcript, ",".join(sorted(r.consequences)),
							r.protein_pos, r.aa_change, r.protein,
							r.sift, r.polyphen, null_value="-"))

	def results(self):
		"""
		Iterator that parses the results temporary file and yields VepResult's
		"""

		with open(self.results_path, "r") as f:
			column_types = (int, str, int, str, str, str, _ctype, str, str, str, float, float)
			for fields in tsv.lines(f, column_types, null_value="-"):
				var_id, chr, start, allele,	gene, transcript, consequences, protein_pos, aa_change, protein, sift, polyphen = fields

				yield VepResult(var_id=var_id, chr=chr, start=start, allele=allele,
					gene=gene, transcript=transcript, consequences=consequences,
					protein_pos = protein_pos, aa_change=aa_change, protein=protein,
					sift=sift, polyphen=polyphen)

	def close(self):
		"""
		Removes temporary files
		"""

		if self.results_path is not None:
			os.remove(self.results_path)
			self.results_path = None