def get_status(analysisId): # noqa: E501 """Retrieves the status for the specified analysis. # noqa: E501 :param analysisId: The analysis identifier returned by '/analysis' :type analysisId: str :rtype: InlineResponse200 """ try: storage = ReactomeStorage() status = storage.get_status(analysisId) if status is None: LOGGER.debug("Unknown identifier passed to get_status: " + analysisId) abort(404, "Unknown identifier") else: # return a Response object to prevent connexion from # de-serializing the object into a JSON object return Response(response=status, status=200, headers={"content-type": "application/json"}) except ReactomeStorageException as e: LOGGER.error("Failed to connect to redis: " + str(e)) abort( 503, "Failed to connect to storage system. Please try again in a few minutes." )
def test_stored_tsv(self): with app.app.test_client() as client: response = client.post( "/upload", data={ "file": (io.BytesIO(self.test_tsv.encode("UTF-8")), "test.tsv") }) self.assertEqual(200, response.status_code) result_obj = json.loads(response.data.decode()) # make sure the samples are correct self.assertEqual("Sample 1:Sample 2:Sample 3", ":".join(result_obj["sample_names"])) self.assertEqual(4, result_obj["n_lines"]) self.assertEqual("CD19:CD20:MITF", ":".join(result_obj["top_identifiers"])) # make sure the data was stored correctly self.assertIsNotNone(result_obj["data_token"]) token = result_obj["data_token"] # create a new redis instance storage = ReactomeStorage() self.assertTrue(storage.request_token_exists(token)) stored_obj = storage.get_request_data(token) self.assertEqual("\t" + self.test_tsv, stored_obj.decode("UTF-8"))
def get_summary(datasetId): # noqa: E501 """Retrieves a summary of the loaded data. This function is only available once. The data is fully loaded. # noqa: E501 :param datasetId: The dataset identifier used to trigger the download :type loadingId: str :rtype: ExternalData """ try: storage = ReactomeStorage() if not storage.request_data_summary_exists(datasetId): abort(404, "Unknown identifier passed.") summary_data = storage.get_request_data_summary(datasetId) if summary_data is not None: return Response(response=summary_data, status=200, headers={"content-type": "application/json"}) abort(404, "Unknown identifier passed.") except ReactomeStorageException as e: LOGGER.error("Failed to connect to redis: " + str(e)) abort( 503, "Failed to connect to storage system. Please try again in a few minutes." )
def get_data_loading_status(loadingId): # noqa: E501 """Retrieves the status for the dataset loading process. # noqa: E501 :param loadingId: The loading identifier returned by '/data/load' :type loadingId: str :rtype: DatasetLoadingStatus """ try: storage = ReactomeStorage() status = storage.get_status(analysis_identifier=loadingId, data_type="dataset") if status is None: LOGGER.debug("Unknown identifier passed to get_status: " + loadingId) abort(404, "Unknown identifier") else: # return a Response object to prevent connexion from # de-serializing the object into a JSON object return Response(response=status, status=200, headers={"content-type": "application/json"}) except ReactomeStorageException as e: LOGGER.error("Failed to connect to redis: " + str(e)) abort( 503, "Failed to connect to storage system. Please try again in a few minutes." )
def _get_storage(self): """ Returns the current connection to the reactome storage :return: A ReactomeStorage object """ if not self._storage: try: self._storage = ReactomeStorage() except Exception as e: LOGGER.error("Failed to connect to storage service: " + str(e)) raise Exception("Failed to connect to storage service", e) return self._storage
def start_analysis(body): # noqa: E501 """Performs the specified gene set analysis # noqa: E501 :param body: Specification of analysis to perform :type body: dict | bytes :rtype: str """ # get the JSON-encoded dict from the request object if connexion.request.is_json: analysis_dict = connexion.request.get_json(cache=False) # de-compress if it's a gzipped string elif connexion.request.content_type == "application/gzip": LOGGER.debug("Received gzipped analysis request. Decompressing...") decompressed_string = zlib.decompress(connexion.request.data) analysis_dict = json.loads(decompressed_string) # free the memory again del decompressed_string else: LOGGER.debug( "Invalid analysis request submitted. Request body does not describe a JSON object." ) abort( 406, "Invalid analysis request submitted. Request body does not describe a JSON object." ) return try: analysis_request = input_deserializer.create_analysis_input_object( analysis_dict) except Exception as e: LOGGER.debug("Unknown analysis method submitted: " + analysis_dict["methodName"]) abort(404, "Unknown analysis method selected.") # make sure all datasets have unique names all_names = [dataset.name for dataset in analysis_request.datasets] if len(all_names) != len(set(all_names)): LOGGER.debug("Analysis request contains duplicate names") abort(406, "Datasets must not have duplicate names") # make sure the analysis design is present for n_dataset in range(0, len(analysis_request.datasets)): if not analysis_request.datasets[n_dataset].design: LOGGER.debug("Analysis request misses design") abort( 406, "Invalid request. Dataset '{name}' misses the required experimental design." .format(name=analysis_request.datasets[n_dataset].name)) if not analysis_request.datasets[n_dataset].design.comparison: LOGGER.debug("Analysis request misses design comparison") abort( 406, "Invalid request. Dataset '{name}' misses the required comparison specification." .format(name=analysis_request.datasets[n_dataset].name)) # generate an analysis id analysis_id = str(uuid.uuid1()) try: storage = ReactomeStorage() # a very basic sanity check to make sure it's unique while storage.analysis_exists(analysis_id): analysis_id = str(uuid.uuid1()) # Load request data from storage for n_dataset in range(0, len(analysis_dict["datasets"])): data = analysis_dict["datasets"][n_dataset]["data"] # Update for external datasets if data[0:4] == "rqu_" or len(data) < 20: # make sure the request data exists if not storage.request_token_exists(data): MISSING_DATA_TOKEN_COUNTER.inc() abort( 500, "No data available for storage token '{}'".format( data)) # load the data stored_data = storage.get_request_data(data) # update the request object analysis_dict["datasets"][n_dataset][ "data"] = stored_data.decode("UTF-8") # Set the initial status encoder = JSONEncoder() status = AnalysisStatus(id=analysis_id, status="running", completed=0, description="Queued") storage.set_status(analysis_id, encoder.encode(status)) # Save the request data analysis_dict["analysisId"] = analysis_id storage.set_analysis_request_data(token=analysis_id, data=encoder.encode(analysis_dict)) try: # Submit the request to the queue queue = ReactomeMQ() queue.post_analysis( AnalysisRequest(request_id=analysis_id).to_json(), analysis_request.method_name) LOGGER.debug("Analysis " + analysis_id + " submitted to queue") queue.close() STARTED_ANALYSIS_COUNTER.inc() return analysis_id except socket.gaierror as e: # update the status LOGGER.error("Failed to connect to queuing system: " + str(e)) status = AnalysisStatus( id=analysis_id, status="failed", completed=0, description="Failed to connect to queuing system.") storage.set_status(analysis_id, encoder.encode(status)) abort( 503, "Failed to connect to queuing system. Please try again in a few seconds." ) except ReactomeMQException as e: LOGGER.error("Failed to post message to queuing system: " + str(e)) # update the status status = AnalysisStatus( id=analysis_id, status="failed", completed=0, description="Failed to connect to queuing system.") storage.set_status(analysis_id, encoder.encode(status)) abort( 503, "The number of analysis requests is currently too high. Please try again in a few minutes." ) except ReactomeStorageException as e: LOGGER.error("Failed to connect to redis: " + str(e)) abort( 503, "Failed to connect to storage system. Please try again in a few minutes." ) except (socket.timeout, socket.gaierror) as e: LOGGER.error( "Socket timeout connecting to storage or queuing system: " + str(e)) abort( 503, "Failed to connect to downstream system. Please try again in a few minutes." )
def process_file_upload(): # test whether the file should be stored or returned store_file = request.args.get('store', 'true').lower() == "true" # make sure only one file is uploaded if len(request.files) != 1: abort(400, "Incorrect number of uploaded files. Function requires exactly one file.") if "file" not in request.files: abort(400, "File must be uploaded as 'file' in the form.") # get the uploaded file user_file = request.files['file'] user_filename = user_file.filename # initialize the return object return_object = {"sample_names": None, "top_identifiers": list(), "n_lines": None} return_lines = list() n_samples = -1 # read the file try: all_lines = [line.decode("UTF-8") for line in user_file.readlines()] except Exception as e: LOGGER.error("Invalid file {name} uploaded: {error}".format(name = user_filename, error=str(e))) abort(400, "Uploaded file is not a text file.") # guess the delimiter delimiter = None if "\t" in all_lines[0]: delimiter = "\t" elif ";" in all_lines[0]: delimiter = ";" elif "," in all_lines[0]: delimiter = "," if not delimiter: abort(500, "Failed to detect used delimiter") csv_reader = csv.reader(all_lines, delimiter=delimiter) header_line = csv_reader.__next__() current_line = 1 for line in csv_reader: current_line += 1 if n_samples == -1: n_samples = len(line) # make sure the file was parsed more or less correctly if n_samples < 2: abort(400, "Failed to parse the file. Only one column detected.") # add an empty cell if there is exactly one column less than the number of samples if len(header_line) == n_samples - 1: header_line = [""] + header_line # make sure the header matches if len(header_line) != n_samples: abort(400, "Different number of column names than entries in row 1: header contains {} fields, " "first line contains {} fields".format(str(len(header_line)), str(n_samples))) # save the sample names return_object["sample_names"] = header_line[1:] # start creating the converted object return_lines.append("\t".join(header_line)) # make sure the number of samples is OK if len(line) != n_samples: abort(400, "Different number of entries in line {}. File contains {} columns but line {} contains {}" .format(str(current_line), str(n_samples), str(current_line), str(len(line)))) # save the first few identifiers as samples if current_line < 10: return_object["top_identifiers"].append(line[0]) # save the line return_lines.append("\t".join(line)) # save the results return_object["n_lines"] = current_line # create the complete result string result_string = "\n".join(return_lines) # add the file if it shouldn't be saved if not store_file: return_object["data"] = result_string else: # store the file try: storage = ReactomeStorage() # create an identifier token = "rqu_" + str(uuid.uuid1()) while storage.request_token_exists(token): token = "rqu_" + str(uuid.uuid1()) # save the data - expire after 6 hours storage.set_request_data(token=token, data=result_string, expire=60*60*6) return_object["data_token"] = token except ReactomeStorageException as e: LOGGER.error("Failed to store request data: " + str(e)) abort(500, "Failed to store request data. Please try again later.") # return the JSON data response_object = make_response(json.dumps(return_object)) # Using the content-type "text/html" instead of the more # appropriate "application/json" to circumvent the lacking # support for JSON in GWT (used by Reactome's pathway browser) response_object.headers["Content-Type"] = "text/html" return response_object
def get_result(analysisId): # noqa: E501 """Retrieves the result for the completed analysis task # noqa: E501 :param analysisId: The analysis identified returned by '/analysis' :type analysisId: str :rtype: AnalysisResult """ try: # check if an extension was present extension = None if "." in analysisId: extension = analysisId[analysisId.find(".") + 1:] analysisId = analysisId[:analysisId.find(".")] storage = ReactomeStorage() if extension == "xlsx": xlsx_file = storage.get_result(analysis_identifier=analysisId, data_type="report") if xlsx_file is not None: return Response(response=xlsx_file, status=200, headers={"content-type": "application/xlsx"}) elif extension == "pdf": pdf_file = storage.get_result(analysis_identifier=analysisId, data_type="pdf_report") if pdf_file is not None: return Response(response=pdf_file, status=200, headers={"content-type": "application/pdf"}) elif extension == "r": r_file = storage.get_result(analysis_identifier=analysisId, data_type="r_script") if r_file is not None: return Response( response=r_file, status=200, headers={ "content-type": "text/plain", "content-disposition": "attachment; filename=\"ReactomeGSA_analysis_script.R\"" }) else: result = storage.get_result(analysisId) if result is not None: return Response(response=result, status=200, headers={"content-type": "application/json"}) # find out why the result doesn't exist status = storage.get_status(analysisId) if not status: LOGGER.debug("Unknown identifier to get_result: " + analysisId) abort(404, "Unknown analysis identifier passed.") # the identifier is valid, so for some reason the result is not ready (yet) abort(406, "Analysis is not complete.") except ReactomeStorageException as e: LOGGER.error("Failed to connect to redis: " + str(e)) abort( 503, "Failed to connect to storage system. Please try again in a few minutes." )
def load_data(resourceId, parameters): # noqa: E501 """Start the retrieval of an external or example dataset. # noqa: E501 :param resourceId: The identifier of the data source to load from :type resourceId: str :param parameters: The parameters for the selected resource. :rtype: str """ try: storage = ReactomeStorage() # generate an id for the request loading_id = str(uuid.uuid1()) # Set the initial status encoder = JSONEncoder() status = DatasetLoadingStatus(id=loading_id, status="running", completed=0, description="Queued") storage.set_status(loading_id, encoder.encode(status), data_type="dataset") # convert the parameters request_parameters = list() for dict_param in parameters: request_parameters.append( DatasetRequestParameter(name=dict_param["name"], value=dict_param["value"])) # create the request request = DatasetRequest(loading_id=loading_id, resource_id=resourceId, parameters=request_parameters) try: queue = ReactomeMQ(queue_name=DATASET_QUEUE) queue.post_analysis(analysis=request.to_json(), method="DatasetLoading") LOGGER.debug("Dataset process " + loading_id + " submitted to queue") queue.close() DATASET_LOADING_COUNTER.inc() return loading_id except socket.gaierror as e: # update the status LOGGER.error("Failed to connect to queuing system: " + str(e)) status = DatasetLoadingStatus( id=loading_id, status="failed", completed=0, description="Failed to connect to queuing system.") storage.set_status(loading_id, encoder.encode(status), data_type="dataset") abort( 503, "Failed to connect to queuing system. Please try again in a few seconds." ) except ReactomeMQException as e: LOGGER.error("Failed to post message to queuing system: " + str(e)) # update the status status = DatasetLoadingStatus( id=loading_id, status="failed", completed=0, description="Failed to connect to queuing system.") storage.set_status(loading_id, encoder.encode(status), data_type="dataset") abort( 503, "The number of analysis requests is currently too high. Please try again in a few minutes." ) except ReactomeStorageException as e: LOGGER.error("Failed to connect to redis: " + str(e)) abort( 503, "Failed to connect to storage system. Please try again in a few minutes." ) except (socket.timeout, socket.gaierror) as e: LOGGER.error( "Socket timeout connecting to storage or queuing system: " + str(e)) abort( 503, "Failed to connect to downstream system. Please try again in a few minutes." )