Esempio n. 1
0
def main():
	parser = argparse.ArgumentParser(
		description="Fetch Condel scores")

	cmd = DefaultCommandHelper(parser)

	cmd.add_db_args()

	parser.add_argument("muts_path", metavar="SNVS_PATH",
						help="SNV's to check. Use - for standard input.")

	parser.add_argument("out_path", metavar="OUTPUT_PATH",
						help="The results path. Use - for standard output.")

	cmd.add_selected_predictors_args()

	cmd.add_selected_annotations_args()

	cmd.add_selected_columns_args()

	args, logger = cmd.parse_args("fetch")

	db = cmd.open_db()

	predictors = cmd.get_selected_predictors()

	annotations = cmd.get_selected_annotations()

	columns = cmd.get_selected_columns()

	logger.info("Reading {} ...".format(args.muts_path if args.muts_path != "-" else "from standard input"))

	try:
		progress = RatedProgress(logger, name="SNVs")

		with tsv.open(args.muts_path) as f:
			with tsv.open(args.out_path, "w") as wf:
				tsv.write_line(wf, "ID", *[c.upper() for c in columns] + [a.upper() for a in annotations] + predictors)

				hit = fail = 0

				mut = DnaAndProtMutationParser()
				for line_num, line in enumerate(f, start=1):
					line = line.rstrip(" \n\r")
					if len(line) == 0 or line.startswith("#"):
						continue

					try:
						mut.parse(line)
					except PrematureEnd:
						logger.error("Missing fields at line {}".format(line_num))
						fail += 1
						continue
					except UnexpectedToken as ex:
						logger.error("Unexpected field '{}' at line {}".format(ex.args[0], line_num))
						fail += 1
						continue

					exists = False
					for row in query_mutation(logger, db, mut, annotations, predictors):

						exists = True

						ann = row["annotations"]
						scores = row["scores"]

						tsv.write_line(wf, mut.identifier,
							   *[row[c] for c in columns]
							   + [ann[a] for a in annotations]
							   + [scores[p] for p in predictors])

						"""
						if logger.isEnabledFor(logging.DEBUG):
							logger.debug("    --> {} {} {} {} {} {} {} {} {} {}".format(
										row["chr"], row["start"], row["ref"], row["alt"], row["transcript"],
										row["protein"], row["aa_pos"], row["aa_ref"], row["aa_alt"],
										mut.identifier or "*"))
						"""

					progress.update()

					if exists:
						hit += 1
					else:
						fail += 1

		progress.log_totals()

		logger.info("Finished. total={}, hits={}, fails={}, elapsed={}".format(hit + fail, hit, fail, progress.elapsed_time))

	except:
		return cmd.handle_error()
	finally:
		db.close()
Esempio n. 2
0
def fetch_iter(db, muts_path, maps=None, predictors=None, muts_header=False, state=None, logger=None):
	"""
	Iterator that fetches scores from the database from the mutations in a file.
	
	:param db: FannsDb interface.
	:param muts_path: The input path for mutations.
	:param maps: Map transcript/protein ensembl identifiers with external identifiers (swissprot_id, ...)
	:param predictors: Predictors for which to obtain the scores.
	:param muts_header: Whether the muts_path has a header or not.
	:param state: The state of the iteration: hits, fails.
	:param logger: Logger to use. If not specified a new one is created.
	"""

	def query_mutation(logger, db, mut, maps, predictors):

		if mut.coord == Mutation.GENOMIC:
			if logger.isEnabledFor(logging.DEBUG):
				logger.debug("  Querying {} {} {} {} {} {} {} ...".format(
					mut.chr, mut.start, mut.end or "*", mut.ref or "*", mut.alt, mut.strand or "*", mut.identifier or "*"))

			for row in db.query_scores(chr=mut.chr, start=mut.start,
											ref=mut.ref, alt=mut.alt, strand=mut.strand,
											predictors=predictors, maps=maps):
				yield row

		elif mut.coord == Mutation.PROTEIN:
			if logger.isEnabledFor(logging.DEBUG):
				logger.debug("  Querying {} {} {} {} {} ...".format(
					mut.protein, mut.start, mut.ref or "*", mut.alt, mut.identifier or "*"))

			for row in db.query_scores(protein=mut.protein, aa_pos=mut.start, aa_ref=mut.ref, aa_alt=mut.alt,
											predictors=predictors, maps=maps):
				yield row

		else:
			logger.warn("Unknown coordinates system: {}".format(mut.line))

	if logger is None:
		logger = logging.getLogger("fannsdb.fetch")

	state = state if state is not None else {}
	state[STATE_HITS] = state[STATE_FAILS] = 0
	maps = maps if maps is not None else []
	predictors = predictors if predictors is not None else []
	
	logger.info("Reading {} ...".format(os.path.basename(muts_path) if muts_path != "-" else "from standard input"))

	progress = RatedProgress(logger, name="SNVs")

	with tsv.open(muts_path) as f:
		if muts_header:
			tsv.skip_comments_and_empty(f) # this returns the first non empty nor comment line (the header)

		mutparser = DnaAndProtMutationParser()
		for line_num, line in enumerate(f, start=1):
			line = line.rstrip(" \n\r")
			if len(line) == 0 or line.startswith("#"):
				continue

			try:
				mut = mutparser.parse(line)
			except PrematureEnd:
				logger.error("Missing fields at line {}".format(line_num))
				state[STATE_FAILS] += 1
				continue
			except UnexpectedToken as ex:
				logger.error("Unexpected field '{}' at line {}".format(ex.args[0], line_num))
				state[STATE_FAILS] += 1
				continue

			state.update({
				STATE_LINE_NUM : line_num,
				STATE_LINE : line,
				STATE_MUTATION : mut})

			exists = False
			for row in query_mutation(logger, db, mut, maps, predictors):
				exists = True

				yield row

			progress.update()

			if exists:
				state[STATE_HITS] += 1
			else:
				state[STATE_FAILS] += 1

	progress.log_totals()

	hits, fails = [state[k] for k in [STATE_HITS, STATE_FAILS]]
	logger.info("Finished. total={}, hits={}, fails={}, elapsed={}".format(hits + fails, hits, fails, progress.elapsed_time))