def save(self): self.logger.debug("In save.") if not self.is_valid(): self.logger.error("Cannot save, data is invalid") return False session = iHMPSession.get_session() self.logger.info("Got iHMP session.") success = False study = self._study remote_path = "/".join(["/" + study, "wgs_raw_seq_set", os.path.basename(self._local_file)]) self.logger.debug("Remote path for this file will be %s." % remote_path) # Upload the file to the iHMP aspera server upload_result = aspera.upload_file(WgsRawSeqSet.aspera_server, session.username, session.password, self._local_file, remote_path) if not upload_result: self.logger.error("Experienced an error uploading the sequence set. Aborting save.") return False if self.id is None: # The document has not yet been save seq_set_data = self._get_raw_doc() self.logger.info("Got the raw JSON document.") try: self.logger.info("Attempting to save a new node.") node_id = session.get_osdf().insert_node(seq_set_data) self.logger.info("Save for " + __name__ + " %s successful." % node_id) self.logger.info("Setting ID for " + __name__ + " %s." % node_id) self._set_id(node_id) self._urls = [ "fasp://" + WgsRawSeqSet.aspera_server + remote_path ] self._version = 1 success = True except Exception as e: self.logger.error("An error occurred while saving " + __name__ + ". " + "Reason: %s" % e) else: seq_set_data = self._get_raw_doc() try: self.logger.info("Attempting to update " + __name__ + " with ID: %s." % self._id) session.get_osdf().edit_node(seq_set_data) self.logger.info("Update for " + __name__ + " %s successful." % self._d) success = True except Exception as e: self.logger.error("An error occurred while updating " + __name__ + " %s. Reason: %s" % self._d, e) self.logger.debug("Returning " + str(success)) return success
def save(self): self.logger.debug("In save.") if not self.is_valid(): self.logger.error("Cannot save, data is invalid") return False session = iHMPSession.get_session() self.logger.info("Got iHMP session.") success = False # Upload the file to the iHMP aspera server upload_result = aspera.upload_file( "aspera.ihmpdcc.org", session.username, session.password, self._local_file, self._remote_path ) if not upload_result: self.logger.error("Experienced an error uploading the sequence set. Aborting save.") raise Exception("Unable to upload file to aspera server.") return success if self.id is None: # The document has not yet been save seq_set_data = self._get_raw_doc() self.logger.info("Got the raw JSON document.") try: self.logger.info("Attempting to save a new node.") node_id = session.get_osdf().insert_node(seq_set_data) self.logger.info("Save for " + __name__ + " %s successful." % node_id) self.logger.info("Setting ID for " + __name__ + " %s." % node_id) self._set_id(node_id) self._version = 1 success = True except Exception as e: self.logger.error("An error occurred while saving " + __name__ + ". " + "Reason: %s" % e) else: seq_set_data = self._get_raw_doc() try: self.logger.info("Attempting to update " + __name__ + " with ID: %s." % self._id) session.get_osdf().edit_node(seq_set_data) self.logger.info("Update for " + __name__ + " %s successful." % self._d) success = True except Exception as e: self.logger.error("An error occurred while updating " + __name__ + " %s. Reason: %s" % self._d, e) return success
def _upload_files(self, study, file_map): study2dir = { "ibd": "ibd", "preg_preterm": "ptb", "prediabetes": "t2d" } if study not in study2dir: raise ValueError("Invalid study. No directory mapping for %s" % study) study_dir = study2dir[study] remote_paths = {} # Get the session so we can get the username and password session = iHMPSession.get_session() username = session.username password = session.password # For each of the Proteome data files (there are 4), transmit them # to the Aspera server and return a dictionary with the computed remote # paths... for file_type, local_file in file_map.iteritems(): self.logger.debug("Uploading %s of Proteome type %s" % (local_file, file_type)) remote_base = os.path.basename(local_file); valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) remote_base = ''.join(c for c in remote_base if c in valid_chars) remote_base = remote_base.replace(' ', '_') # No spaces in filenames remote_path = "/".join(["/" + study_dir, "proteome", file_type, remote_base]) self.logger.debug("Remote path for this file will be %s." % remote_path) # Upload the file to the iHMP aspera server upload_success = aspera.upload_file(Proteome.aspera_server, username, password, local_file, remote_path) if not upload_success: self.logger.error( "Experienced an error uploading file %s. " % local_file) raise Exception("Unable to upload " + local_file) else: remote_paths[file_type] = "fasp://" + Proteome.aspera_server + remote_path return remote_paths
def _upload_data(self): self.logger.debug("In _upload_data.") session = iHMPSession.get_session() study = self._study study2dir = { "ibd": "ibd", "preg_preterm": "ptb", "prediabetes": "t2d" } if study not in study2dir: raise ValueError("Invalid study. No directory mapping for %s" % study) study_dir = study2dir[study] remote_base = os.path.basename(self._local_file); valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) remote_base = ''.join(c for c in remote_base if c in valid_chars) remote_base = remote_base.replace(' ', '_') # No spaces in filenames remote_path = "/".join(["/" + study_dir, "genome", "microbiome", "wgs", "analysis", "hmvir", remote_base]) self.logger.debug("Remote path for this file will be %s." % remote_path) success = False upload_result = aspera.upload_file(ViralSeqSet.aspera_server, session.username, session.password, self._local_file, remote_path) if not upload_result: self.logger.error("Experienced an error uploading the data. " + \ "Aborting save.") raise Exception("Unable to upload viral sequence set.") else: self._urls = [ "fasp://" + ViralSeqSet.aspera_server + remote_path ]
def save(self): """ Saves the data in the current instance. The JSON form of the current data for the instance is validated in the save function. If the data is not valid, then the data will not be saved. If the instance was saved previously, then the node ID is assigned the alpha numeric found in the OSDF instance. If not saved previously, then the node ID is 'None', and upon a successful, will be assigned to the alpha numeric ID found in the OSDF instance. Also, the version is updated as the data is saved in the OSDF instance. Args: None Returns; True if successful, False otherwise. """ self.logger.debug("In save.") aspera_server = "aspera.ihmpdcc.org" if not self.is_valid(): self.logger.error("Cannot save, data is invalid.") return False session = iHMPSession.get_session() self.logger.info("Got iHMP session.") study = self._study remote_path = "/".join(["/" + study, "16s_trimmed_seq_set", os.path.basename(self._local_file)]) self.logger.debug("Remote path for this file will be %s." % remote_path) success = False # Upload the file to the iHMP aspera server upload_result = aspera.upload_file(aspera_server, session.username, session.password, self._local_file, remote_path) if not upload_result: self.logger.error("Experienced an error uploading the sequence set. Aborting save.") return success self.logger.info("Uploaded the %s to the iHMP Aspera server (%s) successfully." % (self._local_file, aspera_server)) if self.id is None: # The document has not yet been save seq_set_data = self._get_raw_doc() self.logger.info("Got the raw JSON document.") try: self.logger.info("Attempting to save a new node.") node_id = session.get_osdf().insert_node(seq_set_data) self.logger.info("Save for " + __name__ + " %s successful." % node_id) self.logger.info("Setting ID for " + __name__ + " %s." % node_id) self._set_id(node_id) self._version = 1 self._urls = [ "fasp://" + aspera_server + remote_path ] success = True except Exception as e: self.logger.error("An error occurred while saving " + __name__ + ". " + "Reason: %s" % e) else: seq_set_data = self._get_raw_doc() try: self.logger.info("Attempting to update " + __name__ + " with ID: %s." % self._id) session.get_osdf().edit_node(seq_set_data) self.logger.info("Update for " + __name__ + " %s successful." % self._d) success = True except Exception as e: self.logger.error("An error occurred while updating " + __name__ + " %s. Reason: %s" % self._d, e) self.logger.debug("Returning " + str(success)) return success
def _upload_data(self): self.logger.debug("In _upload_data.") session = iHMPSession.get_session() study = self._study study2dir = { "ibd": "ibd", "preg_preterm": "ptb", "prediabetes": "t2d" } if study not in study2dir: raise ValueError("Invalid study. No directory mapping for %s" % study) study_dir = study2dir[study] remote_base = os.path.basename(self._local_file); valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) remote_base = ''.join(c for c in remote_base if c in valid_chars) remote_base = remote_base.replace(' ', '_') # No spaces in filenames remote_map = { "16s_community": [ "genome", "microbiome", "16s", "analysis", "hmqcp" ], "wgs_community": [ "genome", "microbiome", "wgs", "analysis", "hmscp" ], "wgs_functional": [ "genome", "microbiome", "wgs", "analysis", "hmmrc" ], "microb_proteomic": [ "proteome", "microbiome", "analysis" ], "microb_lipidomic": [ "lipidome", "microbiome", "analysis" ], "microb_cytokine": [ "cytokine", "microbiome", "analysis" ], "microb_metabolome": [ "metabolome", "microbiome", "analysis" ], "microb_metatranscriptome": [ "metatranscriptome", "microbiome", "analysis" ], "host_proteomic": [ "proteome", "host", "analysis" ], "host_lipidomic": [ "lipidome", "host", "analysis" ], "host_cytokine": [ "cytokine", "host", "analysis" ], "host_metabolome": [ "metabolome", "host", "analysis" ], "host_transcriptome": [ "transcriptome", "host", "analysis" ] } matrix_type = self._matrix_type if matrix_type not in remote_map: raise ValueError("Invalid matrix type. No mapping for %s" % matrix_type) remote_elements = [study_dir] remote_elements.extend(remote_map[matrix_type]) remote_elements.append(remote_base) remote_path = "/" + "/".join(remote_elements) self.logger.debug("Remote path for this abundance matrix will be %s." % remote_path) # Upload the file to the iHMP aspera server upload_result = aspera.upload_file(AbundanceMatrix.aspera_server, session.username, session.password, self._local_file, remote_path) if not upload_result: self.logger.error("Experienced an error uploading the data. " + \ "Aborting save.") raise Exception("Unable to upload abundance matrix.") else: self._urls = [ "fasp://" + AbundanceMatrix.aspera_server + remote_path ]
def save(self): """ Saves the data in OSDF. The JSON form of the current data for the instance is validated in the save function. If the data is not valid, then the data will not be saved. If the instance was saved previously, then the node ID is assigned the alpha numeric found in the OSDF instance. If not saved previously, then the node ID is 'None', and upon a successful, will be assigned to the alpha numeric ID found in OSDF. Also, the version is updated as the data is saved in OSDF. Args: None Returns; True if successful, False otherwise. """ self.logger.debug("In save.") aspera_server = "aspera.ihmpdcc.org" # If node previously saved, use edit_node instead since ID # is given (an update in a way) # can also use get_node to check if the node already exists if not self.is_valid(): self.logger.error("Cannot save, data is invalid.") return False session = iHMPSession.get_session() self.logger.info("Got iHMP session.") study = self._study study2dir = {"ibd": "ibd", "preg_preterm": "ptb", "prediabetes": "t2d"} if study not in study2dir: raise ValueError("Invalid study. No directory mapping for %s" % study) study_dir = study2dir[study] remote_base = os.path.basename(self._local_file) valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) remote_base = "".join(c for c in remote_base if c in valid_chars) remote_base = remote_base.replace(" ", "_") # No spaces in filenames remote_path = "/".join(["/" + study_dir, "genome", "microbiome", "wgs", "analysis", "hmvir", remote_base]) self.logger.debug("Remote path for this file will be %s." % remote_path) success = False upload_result = aspera.upload_file( aspera_server, session.username, session.password, self._local_file, remote_path ) if not upload_result: self.logger.error("Experienced an error uploading the sequence " + "set. Aborting save.") return success self.logger.info( "Uploaded the %s to the iHMP Aspera server (%s) successfully." % (self._local_file, aspera_server) ) self._urls = ["fasp://" + aspera_server + remote_path] if self._id is None: # The document has not yet been saved self.logger.info("About to insert a new " + __name__ + " OSDF node.") # Get the JSON form of the data and load it self.logger.debug("Converting " + __name__ + " to parsed JSON form.") data = json.loads(self.to_json()) self.logger.info("Got the raw JSON document.") try: self.logger.info("Attempting to save a new node.") node_id = session.get_osdf().insert_node(data) self._set_id(node_id) self._version = 1 self.logger.info("Save for " + __name__ + " %s successful." % node_id) self.logger.info("Setting ID for " + __name__ + " %s." % node_id) success = True except Exception as e: self.logger.exception(e) self.logger.error("An error occurred while saving " + __name__ + ". " + "Reason: %s" % e) else: self.logger.info("%s already has an ID, so we do an update (not an insert)." % __name__) try: node_data = self._get_raw_doc() node_id = self._id self.logger.info("Attempting to update " + __name__ + " with ID: %s." % node_id) session.get_osdf().edit_node(node_data) self.logger.info("Update for " + __name__ + " %s successful." % self._id) node_data = session.get_osdf().get_node(node_id) latest_version = node_data["ver"] self._version = latest_version self.logger.debug("The version of this %s is now: %s" % (__name__, str(latest_version))) success = True except Exception as e: self.logger.exception(e) self.logger.error("An error occurred while updating " + "%s %s. Reason: %s.", (__name__, self._id, e)) self.logger.debug("Returning " + str(success)) return success
def save(self): """ Saves the data in the current instance. The JSON form of the current data for the instance is validated in the save function. If the data is not valid, then the data will not be saved. If the instance was saved previously, then the node ID is assigned the alpha numeric found in the OSDF instance. If not saved previously, then the node ID is 'None', and upon a successful, will be assigned to the alpha numeric ID found in the OSDF instance. Also, the version is updated as the data is saved in the OSDF instance. Args: None Returns; True if successful, False otherwise. """ self.logger.debug("In save.") if not self.is_valid(): self.logger.error("Cannot save, data is invalid") return False session = iHMPSession.get_session() self.logger.info("Got iHMP session.") success = False study = self._study study2dir = { "ibd": "ibd", "preg_preterm": "ptb", "prediabetes": "t2d" } if study not in study2dir: raise ValueError("Invalid study. No directory mapping for %s" % study) study_dir = study2dir[study] remote_base = os.path.basename(self._local_file); valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) remote_base = ''.join(c for c in remote_base if c in valid_chars) remote_base = remote_base.replace(' ', '_') # No spaces in filenames remote_path = "/".join(["/" + study_dir, "genome", "microbiome", "wgs", "raw", remote_base]) self.logger.debug("Remote path for this file will be %s." % remote_path) # Upload the file to the iHMP aspera server upload_result = aspera.upload_file(WgsRawSeqSet.aspera_server, session.username, session.password, self._local_file, remote_path) if not upload_result: self.logger.error("Experienced an error uploading the sequence set. Aborting save.") return False else: self._urls = [ "fasp://" + WgsRawSeqSet.aspera_server + remote_path ] if self.id is None: # The document has not yet been saved seq_set_data = self._get_raw_doc() self.logger.info("Got the raw JSON document.") try: self.logger.info("Attempting to save a new node.") node_id = session.get_osdf().insert_node(seq_set_data) self.logger.info("Save for " + __name__ + " %s successful." % node_id) self.logger.info("Setting ID for " + __name__ + " %s." % node_id) self._set_id(node_id) self._version = 1 success = True except Exception as e: self.logger.error("An error occurred while saving " + __name__ + ". " + "Reason: %s" % e) else: seq_set_data = self._get_raw_doc() try: self.logger.info("Attempting to update " + __name__ + " with ID: %s." % self._id) session.get_osdf().edit_node(seq_set_data) self.logger.info("Update for " + __name__ + " %s successful." % self._id) success = True except Exception as e: self.logger.error("An error occurred while updating " + __name__ + " %s. Reason: %s" % self._id, e) self.logger.debug("Returning " + str(success)) return success
def save(self): """ Saves the data in OSDF. The JSON form of the current data for the instance is validated in the save function. If the data is not valid, then the data will not be saved. If the instance was saved previously, then the node ID is assigned the alpha numeric found in the OSDF instance. If not saved previously, then the node ID is 'None', and upon a successful, will be assigned to the alpha numeric ID found in OSDF. Also, the version is updated as the data is saved in OSDF. Args: None Returns; True if successful, False otherwise. """ self.logger.debug("In save.") # If node previously saved, use edit_node instead since ID # is given (an update in a way) # can also use get_node to check if the node already exists if not self.is_valid(): self.logger.error("Cannot save, data is invalid.") return False session = iHMPSession.get_session() self.logger.info("Got iHMP session.") study = self._study study2dir = { "ibd": "ibd", "preg_preterm": "ptb", "prediabetes": "t2d" } if study not in study2dir: raise ValueError("Invalid study. No directory mapping for %s" % study) study_dir = study2dir[study] remote_base = os.path.basename(self._local_file); valid_chars = "-_.%s%s" % (string.ascii_letters, string.digits) remote_base = ''.join(c for c in remote_base if c in valid_chars) remote_base = remote_base.replace(' ', '_') # No spaces in filenames remote_map = { "16s_community": [ "genome", "microbiome", "16s", "analysis", "hmqcp" ], "wgs_community": [ "genome", "microbiome", "wgs", "analysis", "hmscp" ], "wgs_functional": [ "genome", "microbiome", "wgs", "analysis", "hmmrc" ], "microb_proteomic": [ "proteome", "microbiome", "analysis" ], "microb_lipidomic": [ "lipidome", "microbiome", "analysis" ], "microb_cytokine": [ "cytokine", "microbiome", "analysis" ], "microb_metabolome": [ "metabolome", "microbiome", "analysis" ], "microb_metatranscriptome": [ "metatranscriptome", "microbiome", "analysis" ], "host_proteomic": [ "proteome", "host", "analysis" ], "host_lipidomic": [ "lipidome", "host", "analysis" ], "host_cytokine": [ "cytokine", "host", "analysis" ], "host_metabolome": [ "metabolome", "host", "analysis" ], "host_transcriptome": [ "transcriptome", "host", "analysis" ] } matrix_type = self._matrix_type if matrix_type not in remote_map: raise ValueError("Invalid matrix type. No mapping for %s" % matrix_type) remote_elements = [study_dir] remote_elements.extend(remote_map[matrix_type]) remote_elements.append(remote_base) remote_path = "/" + "/".join(remote_elements) self.logger.debug("Remote path for this abundance matrix will be %s." % remote_path) # Upload the file to the iHMP aspera server upload_result = aspera.upload_file(AbundanceMatrix.aspera_server, session.username, session.password, self._local_file, remote_path) if not upload_result: self.logger.error("Experienced an error uploading the " + \ "abundance matrix. Aborting save.") return False else: self._urls = [ "fasp://" + AbundanceMatrix.aspera_server + remote_path ] osdf = session.get_osdf() success = False if self._id is None: self.logger.info("About to insert a new " + __name__ + " OSDF node.") # Get the JSON form of the data and load it self.logger.debug("Converting " + __name__ + " to parsed JSON form.") data = json.loads( self.to_json() ) try: node_id = osdf.insert_node(data) self._set_id(node_id) self._version = 1 success = True except Exception as e: self.logger.exception(e) self.logger.error("An error occurred when saving %s.", self) else: self.logger.info("AbundanceMatrix already has an ID, " + \ "so we do an update (not an insert).") try: matrix_data = self._get_raw_doc() self.logger.info("AbundanceMatrix already has an ID, " + \ "so we do an update (not an insert).") matrix_id = self._id self.logger.debug("AbundanceMatrix OSDF ID to update: %s." % matrix_id) osdf.edit_node(matrix_data) matrix_data = osdf.get_node(matrix_id) latest_version = matrix_data['ver'] self.logger.debug("The version of this AbundanceMatrix " + \ "is now: %s" % str(latest_version)) self._version = latest_version success = True except Exception as e: self.logger.exception(e) self.logger.error("An error occurred when updating %s.", self) return success
def save(self): """ Saves the data in the current instance. The JSON form of the current data for the instance is validated in the save function. If the data is not valid, then the data will not be saved. If the instance was saved previously, then the node ID is assigned the alpha numeric found in the OSDF instance. If not saved previously, then the node ID is 'None', and upon a successful, will be assigned to the alpha numeric ID found in the OSDF instance. Also, the version is updated as the data is saved in the OSDF instance. Args: None Returns; True if successful, False otherwise. """ self.logger.debug("In save.") if not self.is_valid(): self.logger.error("Cannot save, data is invalid") return False session = iHMPSession.get_session() self.logger.info("Got iHMP session.") success = False study = self._study remote_path = "/".join([ "/" + study, "wgs_raw_seq_set", os.path.basename(self._local_file) ]) self.logger.debug("Remote path for this file will be %s." % remote_path) # Upload the file to the iHMP aspera server upload_result = aspera.upload_file(WgsRawSeqSet.aspera_server, session.username, session.password, self._local_file, remote_path) if not upload_result: self.logger.error( "Experienced an error uploading the sequence set. Aborting save." ) return False if self.id is None: # The document has not yet been save seq_set_data = self._get_raw_doc() self.logger.info("Got the raw JSON document.") try: self.logger.info("Attempting to save a new node.") node_id = session.get_osdf().insert_node(seq_set_data) self.logger.info("Save for " + __name__ + " %s successful." % node_id) self.logger.info("Setting ID for " + __name__ + " %s." % node_id) self._set_id(node_id) self._urls = [ "fasp://" + WgsRawSeqSet.aspera_server + remote_path ] self._version = 1 success = True except Exception as e: self.logger.error("An error occurred while saving " + __name__ + ". " + "Reason: %s" % e) else: seq_set_data = self._get_raw_doc() try: self.logger.info("Attempting to update " + __name__ + " with ID: %s." % self._id) session.get_osdf().edit_node(seq_set_data) self.logger.info("Update for " + __name__ + " %s successful." % self._d) success = True except Exception as e: self.logger.error( "An error occurred while updating " + __name__ + " %s. Reason: %s" % self._d, e) self.logger.debug("Returning " + str(success)) return success