def main(): define("port", default=8321, help="Server port", type=int) define("verbose", default=False, help="Prints debugging statements") define("tabix_executable", default=".", help="Path to tabix executable") define("vcf_config", default=".", help="Path to VCF configuration JSON") tornado.options.parse_command_line() logging.info("Starting Tornado web server on http://localhost:%s" % options.port) logging.info("--tabix_executable=%s" % options.tabix_executable) logging.info("--vcf_config=%s" % options.vcf_config) logging.info("--verbose=%s" % options.verbose) # Try loading the VCF mapping logging.info("Loading VCF configuration file \'" + options.vcf_config + "\'...") try: global DATA_MAP DATA_MAP = parse_vcf_config(options.vcf_config) except Exception as e: logging.error("Failed to load VCF configuration file:") logging.error(e) sys.exit(1) application = tornado.web.Application([ (r"/(\w+)/(X|Y|M|\d{1,2})/(\d+)", TabixLookup) ], **settings) application.listen(options.port, **server_settings) tornado.ioloop.IOLoop.instance().start()
def parse_datastore_configuration(): datastore_map = {} for datastore_config in options.mongo_datastores: if (len(datastore_config) == 2): datastore_id, uri = datastore_config datastore_map[datastore_id] = DataStoreConfiguration(uri, []) elif (len(datastore_config) == 3): datastore_id, uri, case_sensitive_databases = datastore_config datastore_map[datastore_id] = DataStoreConfiguration(uri, case_sensitive_databases) else: logging.error("Invalid datastore config: " + repr(datastore_config)) return datastore_map
def get(self, *uri_path): sub_path = self.request.path.replace("/seqpeek_data", "") uri_parts = sub_path.split("/") data_id = uri_parts[1] if data_id not in self._config_map.keys(): logging.error("Unknown SeqPeek data loookup ID [%s]" % data_id) self.send_error(400) return config = self._config_map[data_id] if "gene" not in self.request.arguments: logging.error("Gene label missing in request arguments: [%s]" % str(self.request.arguments)) self.send_error(400) return gene_label = self.get_argument("gene") logging.debug("Querying SeqPeek data for gene \'" + gene_label + "\'") seqObj = sds.SeqPeekResult(config) seqObj.full_gene = False seqObj.gene_name = gene_label try: result = sds.do_gene_query(seqObj) self.write(json.dumps(result, sort_keys=True)) self.set_status(200) except Exception as e: logging.error("Running SeqPeek data service failed: " + str(e)) self.send_error(500)
def get(self, vcf_id, chromosome, coordinate): global VCF_MAP if vcf_id not in DATA_MAP: logging.info("Unknown VCF id \'" + vcf_id + "\'") raise tornado.web.HTTPError(404) file_info = DATA_MAP[vcf_id] file_path = file_info['path'] lookup_fn = None if file_info['type'] == 'vcf': lookup_fn = vcf_lookup elif file_info['type'] == 'trio': lookup_fn = triotype_lookup else: logging.error("Unknown type for file " + file_path) raise tornado.web.HTTPError(404) try: result = lookup_fn(options.tabix_executable, file_path, chromosome, coordinate, coordinate) response = { "chr": result.chromosome, "coordinate": result.coordinate, "values": result.values } self.write(json.dumps(response, sort_keys=True)) self.set_status(200) except Exception as e: logging.error("Running tabix failed:") logging.error(e) raise tornado.web.HTTPError(404)
def get(self, vcf_id, chromosome, start_coordinate, end_coordinate): global VCF_MAP if vcf_id not in DATA_MAP: logging.info("Unknown VCF id \'" + vcf_id + "\'") raise tornado.web.HTTPError(404) file_info = DATA_MAP[vcf_id] file_path = file_info['path'] lookup_fn = None if file_info['type'] == 'tsv': lookup_fn = tsv_region_lookup else: logging.error("Unknown type for file " + file_path) raise tornado.web.HTTPError(404) try: # The result.values array contains dictiories, for which the keys are read from the header line # of the TSV file. For the header line to be included in the output of the tabix command, the "-h" # flag has to be included in the command line. result = lookup_fn(options.tabix_executable + " -h", file_path, chromosome, start_coordinate, end_coordinate) response = { "chr": result.chromosome, "start": result.start, "end": result.end, "values": result.values } self.write(json.dumps(response, sort_keys=True)) self.set_status(200) except Exception as e: logging.error("Running tabix failed:") logging.error(e) raise tornado.web.HTTPError(404)
def get(self, tabix_id, chromosome, start_coordinate, end_coordinate=None): if end_coordinate is None: end_coordinate = start_coordinate if tabix_id not in self._config_map.keys(): if options.verbose: logging.info("Unknown tabix loookup ID [%s]" % tabix_id) raise tornado.web.HTTPError(404) file_info = self._config_map[tabix_id] file_path = file_info['path'] lookup_fn = None tabix_exe = None if file_info['type'] == 'vcf': lookup_fn = vcf_singleline_lookup tabix_exe = options.tabix_executable elif file_info['type'] == 'trio': lookup_fn = triotype_singleline_lookup tabix_exe = options.tabix_executable elif file_info['type'] == 'tsv': lookup_fn = tsv_region_lookup # For parsing the tabix output for TSV files, the header has to be included. Therefore, the "-h" # flag has to be included in the command line. tabix_exe = options.tabix_executable + " -h" else: logging.error("Unknown type for file " + file_path) raise tornado.web.HTTPError(404) try: result = lookup_fn(tabix_exe, file_path, chromosome, start_coordinate, end_coordinate) response = { "chr": result.chromosome, "start": result.start, "end": result.end, "values": result.values, "snpid": result.snpid, "info": result.info } self.write(json.dumps(response, sort_keys=True)) self.set_status(200) except Exception as e: logging.error("Running tabix failed:") logging.error(e) raise tornado.web.HTTPError(404)
def _handle_request_exception(self, error): logging.error("Uncaught exception: " + str(error))
def get(self, *uri_path): sub_path = self.request.path.replace("/variant_summary", "") uri_parts = sub_path.split("/") data_id = uri_parts[1] if data_id not in self._config_map.keys(): logging.error("Unknown Variant Summary data lookup ID [%s]" % data_id) self.send_error(400) return config = self._config_map[data_id] if not set(self.request.arguments).issubset(REQUIRED_ARGUMENTS): logging.error("Variant Summary - invalid arguments: [%s]" % str(self.request.arguments)) self.send_error(400) return chromosome = self.get_argument("chromosome") coordinate = None try: coordinate = int(self.get_argument("coordinate")) except ValueError: logging.error("Variant Summary - invalid value for coordinate: " + str(self.get_argument("coordinate"))) self.send_error(400) return feature_id = self.get_argument("feature_id") logging.debug("Querying Variant Summary for \'" + str((chromosome, coordinate, feature_id)) + "\'") try: result = vsl.do_query(config, chromosome, coordinate, feature_id) self.write(json.dumps(result, sort_keys=True)) self.set_status(200) except CoordinateRangeEmptyError as cnf: logging.info(cnf) self.write(json.dumps({}, sort_keys=True)) self.set_status(200) except WrongLineFoundError as wlf: logging.error(wlf) self.send_error(500) except UnexpectedTabixOutputError as eto: logging.error(eto) self.send_error(500) except TabixExecutionError as tee: logging.error(tee) self.send_error(500) except FeatureNotFoundError as fnf: logging.info(fnf) self.write(json.dumps({}, sort_keys=True)) self.set_status(200) except Exception as e: logging.error("Running Variant Summary service failed: " + str(e)) self.send_error(500)
def get(self, tabix_id, chromosome, start_coordinate, end_coordinate=None): if end_coordinate is None: end_coordinate = start_coordinate if tabix_id not in self._config_map.keys(): logging.error("Unknown tabix lookup ID [%s]" % tabix_id) self.send_error(400) return file_info = self._config_map[tabix_id] file_path = file_info['path'] lookup_fn = None tabix_exe = None if file_info['type'] == 'vcf': lookup_fn = vcf_singleline_lookup tabix_exe = options.tabix_executable elif file_info['type'] == 'trio': lookup_fn = triotype_singleline_lookup tabix_exe = options.tabix_executable elif file_info['type'] == 'tsv': lookup_fn = tsv_region_lookup # For parsing the tabix output for TSV files, the header has to be included. Therefore, the "-h" # flag has to be included in the command line. tabix_exe = options.tabix_executable + " -h" else: logging.error("Unknown type for file in configuration: " + file_path) self.send_error(500) return try: result = lookup_fn(tabix_exe, file_path, chromosome, start_coordinate, end_coordinate) response = self.build_response_object(result.chromosome, result.start, result.end, values=result.values, snpid=result.snpid, ref=result.ref, alt=result.alt, info=result.info) self.write(json.dumps(response, sort_keys=True)) self.set_status(200) except CoordinateRangeEmptyError as cnf: logging.info(cnf) response = self.build_response_object(chromosome, start_coordinate, end_coordinate) self.write(json.dumps(response, sort_keys=True)) self.set_status(200) except WrongLineFoundError as wlf: logging.error(wlf) self.send_error(500) except UnexpectedTabixOutputError as eto: logging.error(eto) self.send_error(500) except TabixExecutionError as tee: logging.error(tee) self.send_error(500) except Exception as e: logging.error("Running tabix failed: " + str(e)) self.send_error(500)