def write_error(self, status_code, **kwargs): '''Overrides the error page created by Tornado''' if status_code == 404: # just use the 404 page as the error self.render("404.html") return is_admin = False user = self.get_current_user() if user: try: is_admin = user.level == 'admin' except: # Any issue with this check leaves default as not admin pass # render error page self.render('error.html', status_code=status_code, is_admin=is_admin) # log the error from traceback import format_exception exc_info = kwargs["exc_info"] trace_info = ''.join( ["%s\n" % line for line in format_exception(*exc_info)]) req_dict = self.request.__dict__ # must trim body to 1024 chars to prevent huge error messages req_dict['body'] = req_dict.get('body', '')[:1024] request_info = ''.join([ "<strong>%s</strong>: %s\n" % (k, req_dict[k]) for k in req_dict.keys() ]) error = exc_info[1] LogEntry.create( 'Runtime', 'ERROR:\n%s\nTRACE:\n%s\nHTTP INFO:\n%s\n' % (error, trace_info, request_info))
def post(self, analysis_id): analysis_id = int(analysis_id.split("/")[0]) analysis_id_sent = int(self.get_argument('analysis_id')) action = self.get_argument('action') if analysis_id != analysis_id_sent or action != 'delete_analysis': raise QiitaPetAuthorizationError( self.current_user.id, 'analysis/results/%d-delete' % analysis_id) analysis = Analysis(analysis_id) analysis_name = analysis.name check_analysis_access(self.current_user, analysis) try: Analysis.delete(analysis_id) msg = ("Analysis <b><i>%s</i></b> has been deleted." % ( analysis_name)) level = "success" except Exception as e: e = str(e) msg = ("Couldn't remove <b><i>%s</i></b> analysis: %s" % ( analysis_name, e)) level = "danger" LogEntry.create('Runtime', "Couldn't remove analysis ID %d: %s" % (analysis_id, e)) self.redirect(u"/analysis/show/?level=%s&message=%s" % (level, msg))
def post(self): message = "" level = "" page = "lost_pass.html" user_id = None try: user = User(self.get_argument("email")) except QiitaDBUnknownIDError: message = "ERROR: Unknown user." level = "danger" else: user_id = user.id user.generate_reset_code() info = user.info try: send_email(user.id, "Qiita: Password Reset", "Please go to " "the following URL to reset your password: "******"http://qiita.colorado.edu/auth/reset/%s" % info["pass_reset_code"]) message = ("Check your email for the reset code.") level = "success" page = "index.html" except Exception as e: message = ("Unable to send email. Error has been registered. " "Your password has not been reset.") level = "danger" LogEntry.create('Runtime', "Unable to send forgot password " "email: %s" % str(e), info={'User': user.id}) self.render(page, user=user_id, message=message, level=level)
def _generate_demultiplexed_fastq_demux(self, mtime): """Modularity helper""" # An artifact will hold only one file of type # `preprocessed_demux`. Thus, we only use the first one # (the only one present) ar = self.artifact demux = [ path for _, path, ftype in ar.filepaths if ftype == 'preprocessed_demux' ][0] demux_samples = set() with open_file(demux) as demux_fh: if not isinstance(demux_fh, File): error_msg = ("'%s' doesn't look like a demux file" % demux) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) for s, i in to_per_sample_ascii(demux_fh, self.prep_template.keys()): sample_fp = self.sample_demux_fps[s] wrote_sequences = False with GzipFile(sample_fp, mode='w', mtime=mtime) as fh: for record in i: fh.write(record) wrote_sequences = True if wrote_sequences: demux_samples.add(s) else: del (self.samples[s]) del (self.samples_prep[s]) del (self.sample_demux_fps[s]) remove(sample_fp) return demux_samples
def _generate_demultiplexed_fastq_demux(self, mtime): """Modularity helper""" # An artifact will hold only one file of type # `preprocessed_demux`. Thus, we only use the first one # (the only one present) ar = self.artifact demux = [path for _, path, ftype in ar.filepaths if ftype == 'preprocessed_demux'][0] demux_samples = set() with open_file(demux) as demux_fh: if not isinstance(demux_fh, File): error_msg = ( "'%s' doesn't look like a demux file" % demux) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) for s, i in to_per_sample_ascii(demux_fh, self.prep_template.keys()): sample_fp = self.sample_demux_fps[s] wrote_sequences = False with GzipFile(sample_fp, mode='w', mtime=mtime) as fh: for record in i: fh.write(record) wrote_sequences = True if wrote_sequences: demux_samples.add(s) else: del(self.samples[s]) del(self.samples_prep[s]) del(self.sample_demux_fps[s]) remove(sample_fp) return demux_samples
def post(self, analysis_id): analysis_id = int(analysis_id.split("/")[0]) analysis_id_sent = int(self.get_argument('analysis_id')) action = self.get_argument('action') if analysis_id != analysis_id_sent or action != 'delete_analysis': raise QiitaPetAuthorizationError( self.current_user.id, 'analysis/results/%d-delete' % analysis_id) analysis = Analysis(analysis_id) analysis_name = analysis.name check_analysis_access(self.current_user, analysis) try: Analysis.delete(analysis_id) msg = ("Analysis <b><i>%s</i></b> has been deleted." % (analysis_name)) level = "success" except Exception as e: e = str(e) msg = ("Couldn't remove <b><i>%s</i></b> analysis: %s" % (analysis_name, e)) level = "danger" LogEntry.create( 'Runtime', "Couldn't remove analysis ID %d: %s" % (analysis_id, e)) self.redirect(u"/analysis/show/?level=%s&message=%s" % (level, msg))
def post(self): message = "" level = "" page = "lost_pass.html" user_id = None try: user = User(self.get_argument("email")) except QiitaDBUnknownIDError: message = "ERROR: Unknown user." level = "danger" else: user_id = user.id user.generate_reset_code() info = user.info try: send_email(user.id, "Qiita: Password Reset", "Please go to " "the following URL to reset your password: \n" "%s/auth/reset/%s \nYou " "have 30 minutes from the time you requested a " "reset to change your password. After this period, " "you will have to request another reset." % (qiita_config.base_url, info["pass_reset_code"])) message = ("Check your email for the reset code.") level = "success" page = "index.html" except Exception as e: message = ("Unable to send email. Error has been registered. " "Your password has not been reset.") level = "danger" LogEntry.create('Runtime', "Unable to send forgot password " "email: %s" % str(e), info={'User': user.id}) self.render(page, user=user_id, message=message, level=level)
def write_error(self, status_code, **kwargs): '''Overrides the error page created by Tornado''' if status_code == 404: # just use the 404 page as the error self.render("404.html") return is_admin = False user = self.get_current_user() if user: try: is_admin = user.level == 'admin' except: # Any issue with this check leaves default as not admin pass # render error page self.render('error.html', status_code=status_code, is_admin=is_admin) # log the error from traceback import format_exception exc_info = kwargs["exc_info"] trace_info = ''.join(["%s\n" % line for line in format_exception(*exc_info)]) req_dict = self.request.__dict__ # must trim body to 1024 chars to prevent huge error messages req_dict['body'] = req_dict.get('body', '')[:1024] request_info = ''.join(["<strong>%s</strong>: %s\n" % (k, req_dict[k]) for k in req_dict.keys()]) error = exc_info[1] LogEntry.create( 'Runtime', 'ERROR:\n%s\nTRACE:\n%s\nHTTP INFO:\n%s\n' % (error, trace_info, request_info))
def write_error(self, status_code, **kwargs): '''Overrides the error page created by Tornado''' if status_code == 404: # just use the 404 page as the error self.render("404.html", user=self.current_user) return if self.current_user: is_admin = User(self.current_user).level == 'admin' else: is_admin = False # render error page self.render('error.html', user=self.current_user, status_code=status_code, is_admin=is_admin) # log the error from traceback import format_exception exc_info = kwargs["exc_info"] trace_info = ''.join(["%s\n" % line for line in format_exception(*exc_info)]) request_info = ''.join(["<strong>%s</strong>: %s\n" % (k, self.request.__dict__[k]) for k in self.request.__dict__.keys()]) error = exc_info[1] LogEntry.create( 'Runtime', 'ERROR:\n%s\nTRACE:\n%s\nHTTP INFO:\n%s\n' % (error, trace_info, request_info))
def get_filepaths(self, conn_handler=None): r"""Retrieves the list of (filepath_id, filepath)""" # Check that this function has been called from a subclass self._check_subclass() # Check if the connection handler has been provided. Create a new # one if not. conn_handler = conn_handler if conn_handler else SQLConnectionHandler() if self._table == 'required_sample_info': table = 'sample_template_filepath' column = 'study_id' elif self._table == 'common_prep_info': table = 'prep_template_filepath' column = 'prep_template_id' else: raise QiitaDBNotImplementedError( 'get_filepath for %s' % self._table) try: filepath_ids = conn_handler.execute_fetchall( "SELECT filepath_id, filepath FROM qiita.filepath WHERE " "filepath_id IN (SELECT filepath_id FROM qiita.{0} WHERE " "{1}=%s) ORDER BY filepath_id DESC".format(table, column), (self.id, )) except Exception as e: LogEntry.create('Runtime', str(e), info={self.__class__.__name__: self.id}) raise e _, fb = get_mountpoint('templates', conn_handler)[0] base_fp = partial(join, fb) return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
def get_filepaths(self): r"""Retrieves the list of (filepath_id, filepath)""" # Check that this function has been called from a subclass self._check_subclass() # Check if the connection handler has been provided. Create a new # one if not. conn_handler = SQLConnectionHandler() try: filepath_ids = conn_handler.execute_fetchall( "SELECT filepath_id, filepath FROM qiita.filepath WHERE " "filepath_id IN (SELECT filepath_id FROM qiita.{0} WHERE " "{1}=%s) ORDER BY filepath_id DESC".format( self._filepath_table, self._id_column), (self.id, )) except Exception as e: LogEntry.create('Runtime', str(e), info={self.__class__.__name__: self.id}) raise e _, fb = get_mountpoint('templates')[0] base_fp = partial(join, fb) return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
def _failure_callback(self, msg=None): """Callback to execute in case that any of the job nodes failed Need to change the preprocessed data process status to 'failed' """ self.preprocessed_data.processing_status = 'failed: %s' % msg LogEntry.create('Fatal', msg, info={'preprocessed_data': self.preprocessed_data.id})
def test_create_log_entry(self): """""" log_entry = LogEntry.create(2, 'runtime message') log_entry = LogEntry.create(3, 'fatal message', info={1: 2}) log_entry = LogEntry.create(1, 'warning message', info={9: 0}) with self.assertRaises(QiitaDBExecutionError): # This severity level does not exist in the test schema log_entry = LogEntry.create(4, 'warning message', info={9: 0})
def _failure_callback(self, msg=None): """Callback to execute in case that any of the job nodes failed Need to change the preprocessed data process status to 'failed' """ self.preprocessed_data.processing_status = 'failed: %s' % msg LogEntry.create('Fatal', msg, info={'preprocessed_data': self.preprocessed_data.id})
def get(self, ignore): user = self.get_argument('user') query = self.get_argument('query') echo = int(self.get_argument('sEcho')) if user != self.current_user.id: raise HTTPError(403, 'Unauthorized search!') if query: # Search for samples matching the query search = QiitaStudySearch() try: search(query, self.current_user) study_proc, proc_samples, _ = search.filter_by_processed_data() except ParseException: self.clear() self.set_status(400) self.write('Malformed search query. Please read "search help" ' 'and try again.') return except QiitaDBIncompatibleDatatypeError as e: self.clear() self.set_status(400) searchmsg = ''.join(e) self.write(searchmsg) return except Exception as e: # catch any other error as generic server error self.clear() self.set_status(500) self.write("Server error during search. Please try again " "later") LogEntry.create('Runtime', str(e), info={ 'User': self.current_user.id, 'query': query }) return else: study_proc = proc_samples = None info = _build_study_info(self.current_user, study_proc=study_proc, proc_samples=proc_samples) # build the table json results = { "sEcho": echo, "iTotalRecords": len(info), "iTotalDisplayRecords": len(info), "aaData": info } # return the json in compact form to save transmit size self.write(dumps(results, separators=(',', ':')))
def _failure_callback(self, msg=None): """Executed if something fails""" # set the analysis to errored self.analysis.status = 'error' if self._update_status is not None: self._update_status("Failed") # set any jobs to errored if they didn't execute for job in self.analysis.jobs: if job.status not in {'error', 'completed'}: job.status = 'error' LogEntry.create('Runtime', msg, info={'analysis': self.analysis.id})
def artifact_status_put_req(artifact_id, user_id, visibility): """Set the status of the artifact given Parameters ---------- artifact_id : int Artifact being acted on user_id : str The user requesting the action visibility : {'sandbox', 'awaiting_approval', 'private', 'public'} What to change the visibility to Returns ------- dict Status of action, in the form {'status': status, 'message': msg} status: status of the action, either success or error message: Human readable message for status """ if visibility not in get_visibilities(): return {'status': 'error', 'message': 'Unknown visibility value: %s' % visibility} pd = Artifact(int(artifact_id)) sid = pd.study.id access_error = check_access(sid, user_id) if access_error: return access_error user = User(str(user_id)) status = 'success' msg = 'Artifact visibility changed to %s' % visibility # Set the approval to private if needs approval and admin if visibility == 'private': if not qiita_config.require_approval: pd.visibility = 'private' # Set the approval to private if approval not required elif user.level == 'admin': pd.visibility = 'private' # Trying to set approval without admin privileges else: status = 'error' msg = 'User does not have permissions to approve change' else: pd.visibility = visibility LogEntry.create('Warning', '%s changed artifact %s (study %d) to %s' % ( user_id, artifact_id, sid, visibility)) return {'status': status, 'message': msg}
def _failure_callback(self, msg=None): """Executed if something fails""" # set the analysis to errored self.analysis.status = 'error' if self._update_status is not None: self._update_status("Failed") # set any jobs to errored if they didn't execute for job in self.analysis.jobs: if job.status not in {'error', 'completed'}: job.status = 'error' LogEntry.create('Runtime', msg, info={'analysis': self.analysis.id})
def post(self): passmsg = "" msg = "" user = self.current_user action = self.get_argument("action") if action == "profile": # tuple of colmns available for profile # FORM INPUT NAMES MUST MATCH DB COLUMN NAMES form_data = UserProfile() form_data.process(data=self.request.arguments) profile = { name: data[0] for name, data in viewitems(form_data.data) } # Turn default value as list into default strings for field in form_data: field.data = field.data[0] try: user.info = profile msg = "Profile updated successfully" except Exception as e: msg = "ERROR: profile could not be updated" LogEntry.create('Runtime', "Cound not update profile: %s" % str(e), info={'User': user.id}) elif action == "password": form_data = UserProfile() form_data.process(data=user.info) oldpass = self.get_argument("oldpass") newpass = self.get_argument("newpass") try: changed = user.change_password(oldpass, newpass) except Exception as e: passmsg = "ERROR: could not change password" LogEntry.create('Runtime', "Could not change password: %s" % str(e), info={'User': user.id}) else: if changed: passmsg = "Password changed successfully" else: passmsg = "Incorrect old password" self.render("user_profile.html", user=user.id, profile=form_data, msg=msg, passmsg=passmsg)
def get(self, ignore): user = self.get_argument('user') query = self.get_argument('query') echo = int(self.get_argument('sEcho')) if user != self.current_user.id: raise HTTPError(403, 'Unauthorized search!') if query: # Search for samples matching the query search = QiitaStudySearch() try: search(query, self.current_user) study_proc, proc_samples, _ = search.filter_by_processed_data() except ParseException: self.clear() self.set_status(400) self.write('Malformed search query. Please read "search help" ' 'and try again.') return except QiitaDBIncompatibleDatatypeError as e: self.clear() self.set_status(400) searchmsg = ''.join(e) self.write(searchmsg) return except Exception as e: # catch any other error as generic server error self.clear() self.set_status(500) self.write("Server error during search. Please try again " "later") LogEntry.create('Runtime', str(e), info={'User': self.current_user.id, 'query': query}) return else: study_proc = proc_samples = None info = _build_study_info(self.current_user, study_proc=study_proc, proc_samples=proc_samples) # build the table json results = { "sEcho": echo, "iTotalRecords": len(info), "iTotalDisplayRecords": len(info), "aaData": info } # return the json in compact form to save transmit size self.write(dumps(results, separators=(',', ':')))
def test_create_log_entry(self): """""" LogEntry.create('Runtime', 'runtime message') LogEntry.create('Fatal', 'fatal message', info={1: 2}) LogEntry.create('Warning', 'warning message', info={9: 0}) with self.assertRaises(IncompetentQiitaDeveloperError): # This severity level does not exist in the test schema LogEntry.create('Chicken', 'warning message', info={9: 0})
def send_xml(self): # Send the XML files curl_command = self.generate_curl_command() curl_command_parts = shsplit(curl_command) temp_fd, temp_fp = mkstemp() call(curl_command_parts, stdout=temp_fd) close(temp_fd) with open(temp_fp, 'U') as curl_output_f: curl_result = curl_output_f.read() study_accession = None submission_accession = None if 'success="true"' in curl_result: LogEntry.create('Runtime', curl_result) print curl_result print "SUCCESS" accessions = search( '<STUDY accession="(?P<study>.+?)".*?' '<SUBMISSION accession="(?P<submission>.+?)"', curl_result) if accessions is not None: study_accession = accessions.group('study') submission_accession = accessions.group('submission') LogEntry.create('Runtime', "Study accession:\t%s" % study_accession) LogEntry.create( 'Runtime', "Submission accession:\t%s" % submission_accession) print "Study accession:\t", study_accession print "Submission accession:\t", submission_accession else: LogEntry.create('Runtime', ("However, the accession numbers " "could not be found in the output " "above.")) print( "However, the accession numbers could not be found in " "the output above.") else: LogEntry.create('Fatal', curl_result) print curl_result print "FAILED" return (study_accession, submission_accession)
def post(self): numentries = int(self.get_argument("numrecords")) if numentries < 0: numentries = 100 logentries = LogEntry.newest_records(numentries) self.render("error_log.html", logentries=logentries, user=self.current_user)
def test_time_property(self): """""" sql = "SELECT localtimestamp" before = self.conn_handler.execute_fetchone(sql)[0] log_entry = LogEntry.create('Warning', 'warning test', info=None) after = self.conn_handler.execute_fetchone(sql)[0] self.assertTrue(before < log_entry.time < after)
def test_time_property(self): """""" sql = "SELECT localtimestamp" before = self.conn_handler.execute_fetchone(sql)[0] log_entry = LogEntry.create('Warning', 'warning test', info=None) after = self.conn_handler.execute_fetchone(sql)[0] self.assertTrue(before < log_entry.time < after)
def execute(job_id): """Executes a job through the plugin system Parameters ---------- job_id : str The id of the job to execute """ # Create the new job job = ProcessingJob(job_id) job_dir = join(get_work_base_dir(), job.id) software = job.command.software plugin_start_script = software.start_script plugin_env_script = software.environment_script # Get the command to start the plugin cmd = '%s "%s" "%s" "%s" "%s" "%s"' % ( qiita_config.plugin_launcher, plugin_env_script, plugin_start_script, qiita_config.base_url, job.id, job_dir) # Start the plugin std_out, std_err, return_value = system_call(cmd) if return_value != 0: # Something wrong happened during the plugin start procedure job.status = 'error' log = LogEntry.create( 'Runtime', "Error starting plugin '%s':\nStd output:%s\nStd error:%s" % (software.name, std_out, std_err)) job.log = log
def post(self): self.check_access() numentries = int(self.get_argument("numrecords")) if numentries <= 0: numentries = 100 logentries = LogEntry.newest_records(numentries) self.render("error_log.html", logentries=logentries)
def test_create_log_entry(self): """""" LogEntry.create('Runtime', 'runtime message') LogEntry.create('Fatal', 'fatal message', info={1: 2}) LogEntry.create('Warning', 'warning message', info={9: 0}) with self.assertRaises(IncompetentQiitaDeveloperError): # This severity level does not exist in the test schema LogEntry.create('Chicken', 'warning message', info={9: 0})
def send_xml(self): # Send the XML files curl_command = self.generate_curl_command() curl_command_parts = shsplit(curl_command) temp_fd, temp_fp = mkstemp() call(curl_command_parts, stdout=temp_fd) close(temp_fd) with open(temp_fp, 'U') as curl_output_f: curl_result = curl_output_f.read() study_accession = None submission_accession = None if 'success="true"' in curl_result: LogEntry.create('Runtime', curl_result) print curl_result print "SUCCESS" accessions = search('<STUDY accession="(?P<study>.+?)".*?' '<SUBMISSION accession="(?P<submission>.+?)"', curl_result) if accessions is not None: study_accession = accessions.group('study') submission_accession = accessions.group('submission') LogEntry.create('Runtime', "Study accession:\t%s" % study_accession) LogEntry.create('Runtime', "Submission accession:\t%s" % submission_accession) print "Study accession:\t", study_accession print "Submission accession:\t", submission_accession else: LogEntry.create('Runtime', ("However, the accession numbers " "could not be found in the output " "above.")) print ("However, the accession numbers could not be found in " "the output above.") else: LogEntry.create('Fatal', curl_result) print curl_result print "FAILED" return (study_accession, submission_accession)
def add_filepath(self, filepath, fp_id=None): r"""Populates the DB tables for storing the filepath and connects the `self` objects with this filepath""" with TRN: fp_id = self._fp_id if fp_id is None else fp_id try: fpp_id = insert_filepaths([(filepath, fp_id)], None, "templates", "filepath", move_files=False)[0] sql = """INSERT INTO qiita.{0} ({1}, filepath_id) VALUES (%s, %s)""".format( self._filepath_table, self._id_column ) TRN.add(sql, [self._id, fpp_id]) TRN.execute() except Exception as e: LogEntry.create("Runtime", str(e), info={self.__class__.__name__: self.id}) raise e
def test_add_info(self): """""" log_entry = LogEntry.create('Warning', 'warning test', info={1: 2, 'test': 'yeah'}) log_entry.add_info({'another': 'set', 'of': 'entries', 'test': 3}) self.assertEqual(log_entry.info, [{'1': 2, 'test': 'yeah'}, {'another': 'set', 'of': 'entries', 'test': 3}])
def post(self): numentries = int(self.get_argument("numrecords")) if numentries < 0: numentries = 100 logentries = LogEntry.newest_records(numentries) self.render("error_log.html", logentries=logentries, user=self.current_user)
def test_info_property(self): """""" log_entry = LogEntry.create('Warning', 'warning test', info={ 1: 2, 'test': 'yeah' }) self.assertEqual(log_entry.info, [{'1': 2, 'test': 'yeah'}])
def test_clear_info(self): """""" log_entry = LogEntry.create('Warning', 'warning test', info={ 1: 2, 'test': 'yeah' }) log_entry.clear_info() self.assertEqual(log_entry.info, [])
def post(self): passmsg = "" msg = "" user = self.current_user action = self.get_argument("action") if action == "profile": # tuple of colmns available for profile # FORM INPUT NAMES MUST MATCH DB COLUMN NAMES form_data = UserProfile() form_data.process(data=self.request.arguments) profile = {name: data[0] for name, data in viewitems(form_data.data)} # Turn default value as list into default strings for field in form_data: field.data = field.data[0] try: user.info = profile msg = "Profile updated successfully" except Exception as e: msg = "ERROR: profile could not be updated" LogEntry.create('Runtime', "Cound not update profile: %s" % str(e), info={'User': user.id}) elif action == "password": form_data = UserProfile() form_data.process(data=user.info) oldpass = self.get_argument("oldpass") newpass = self.get_argument("newpass") try: changed = user.change_password(oldpass, newpass) except Exception as e: passmsg = "ERROR: could not change password" LogEntry.create('Runtime', "Could not change password: %s" % str(e), info={'User': user.id}) else: if changed: passmsg = "Password changed successfully" else: passmsg = "Incorrect old password" self.render("user_profile.html", user=user.id, profile=form_data, msg=msg, passmsg=passmsg)
def test_artifact_status_put_req_private(self): obs = artifact_status_put_req(1, '*****@*****.**', 'private') exp = { 'status': 'success', 'message': 'Artifact visibility changed to private' } self.assertEqual(obs, exp) # testing that the log message is generated self.assertEqual( LogEntry.newest_records(1)[0].msg, '[email protected] changed artifact 1 (study 1) to private')
def post(self): error = "" try: user = User(self.get_argument("email")) except QiitaDBUnknownIDError: error = "ERROR: Unknown user." else: user.generate_reset_code() info = user.info try: send_email(user, "QIITA: Password Reset", "Please go to the " "following URL to reset your password: "******"http://qiita.colorado.edu/auth/reset/%s" % info["pass_reset_code"]) error = "Password reset. Check your email for the reset code." except Exception as e: error = "Unable to send email." LogEntry.create('Runtime', "Unable to send forgot password " "email" % str(e), info={'User': user.id}) self.render("lost_pass.html", user=None, error=error)
def post(self): analysis_id = int(self.get_argument('analysis_id')) analysis = Analysis(analysis_id) analysis_name = analysis.name check_analysis_access(self.current_user, analysis) try: Analysis.delete(analysis_id) msg = ("Analysis <b><i>%s</i></b> has been deleted." % ( analysis_name)) level = "success" except Exception as e: e = str(e) msg = ("Couldn't remove <b><i>%s</i></b> analysis: %s" % ( analysis_name, e)) level = "danger" LogEntry.create('Runtime', "Couldn't remove analysis ID %d: %s" % (analysis_id, e)) self.redirect(u"/analysis/show/?level=%s&message=%s" % (level, msg))
def post(self): analysis_id = int(self.get_argument('analysis_id')) analysis = Analysis(analysis_id) analysis_name = analysis.name check_analysis_access(self.current_user, analysis) try: Analysis.delete(analysis_id) msg = ("Analysis <b><i>%s</i></b> has been deleted." % (analysis_name)) level = "success" except Exception as e: e = str(e) msg = ("Couldn't remove <b><i>%s</i></b> analysis: %s" % (analysis_name, e)) level = "danger" LogEntry.create( 'Runtime', "Couldn't remove analysis ID %d: %s" % (analysis_id, e)) self.redirect(u"/analysis/show/?level=%s&message=%s" % (level, msg))
def _generate_demultiplexed_fastq_per_sample_FASTQ(self): """Modularity helper""" ar = self.artifact fps = [(basename(fp), fp) for _, fp, fpt in ar.filepaths if fpt == 'raw_forward_seqs'] fps.sort(key=lambda x: x[1]) if 'run_prefix' in self.prep_template.categories(): rps = [(k, v) for k, v in viewitems( self.prep_template.get_category('run_prefix'))] else: rps = [(v, v.split('.', 1)[1]) for v in self.prep_template.keys()] rps.sort(key=lambda x: x[1]) demux_samples = set() for sn, rp in rps: for i, (bn, fp) in enumerate(fps): if bn.startswith(rp): demux_samples.add(sn) new_fp = self.sample_demux_fps[sn] if fp.endswith('.gz'): copyfile(fp, new_fp) else: cmd = "gzip -c %s > %s" % (fp, new_fp) stdout, stderr, rv = system_call(cmd) if rv != 0: error_msg = ( "Error:\nStd output:%s\nStd error:%s" % (stdout, stderr)) raise EBISubmissionError(error_msg) del fps[i] break if fps: error_msg = ( 'Discrepancy between filepaths and sample names. Extra' ' filepaths: %s' % ', '.join([fp[0] for fp in fps])) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) return demux_samples, \ set(self.samples.keys()).difference(set(demux_samples))
def _generate_demultiplexed_fastq_per_sample_FASTQ(self): """Modularity helper""" ar = self.artifact fps = [(basename(fp), fp) for _, fp, fpt in ar.filepaths if fpt == 'raw_forward_seqs'] fps.sort(key=lambda x: x[1]) if 'run_prefix' in self.prep_template.categories(): rps = [(k, v) for k, v in viewitems( self.prep_template.get_category('run_prefix'))] else: rps = [(v, v.split('.', 1)[1]) for v in self.prep_template.keys()] rps.sort(key=lambda x: x[1]) demux_samples = set() for sn, rp in rps: for i, (bn, fp) in enumerate(fps): if bn.startswith(rp): demux_samples.add(sn) new_fp = self.sample_demux_fps[sn] if fp.endswith('.gz'): copyfile(fp, new_fp) else: cmd = "gzip -c %s > %s" % (fp, new_fp) stdout, stderr, rv = system_call(cmd) if rv != 0: error_msg = ( "Error:\nStd output:%s\nStd error:%s" % (stdout, stderr)) raise EBISubmissionError(error_msg) del fps[i] break if fps: error_msg = ( 'Discrepancy between filepaths and sample names. Extra' ' filepaths: %s' % ', '.join([fp[0] for fp in fps])) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) return demux_samples, \ set(self.samples.keys()).difference(set(demux_samples))
def post(self): error = "" try: user = User(self.get_argument("email")) except QiitaDBUnknownIDError: error = "ERROR: Unknown user." else: user.generate_reset_code() info = user.info try: send_email( user, "QIITA: Password Reset", "Please go to the " "following URL to reset your password: "******"http://qiita.colorado.edu/auth/reset/%s" % info["pass_reset_code"]) error = "Password reset. Check your email for the reset code." except Exception as e: error = "Unable to send email." LogEntry.create('Runtime', "Unable to send forgot password " "email" % str(e), info={'User': user.id}) self.render("lost_pass.html", user=None, error=error)
def get_filepaths(self): r"""Retrieves the list of (filepath_id, filepath)""" with TRN: try: sql = """SELECT filepath_id, filepath FROM qiita.filepath WHERE filepath_id IN ( SELECT filepath_id FROM qiita.{0} WHERE {1}=%s) ORDER BY filepath_id DESC""".format( self._filepath_table, self._id_column) TRN.add(sql, [self.id]) filepath_ids = TRN.execute_fetchindex() except Exception as e: LogEntry.create('Runtime', str(e), info={self.__class__.__name__: self.id}) raise e _, fb = get_mountpoint('templates')[0] base_fp = partial(join, fb) return [(fpid, base_fp(fp)) for fpid, fp in filepath_ids]
def add_filepath(self, filepath, fp_id=None): r"""Populates the DB tables for storing the filepath and connects the `self` objects with this filepath""" # Check that this function has been called from a subclass self._check_subclass() # Check if the connection handler has been provided. Create a new # one if not. conn_handler = SQLConnectionHandler() fp_id = self._fp_id if fp_id is None else fp_id try: fpp_id = insert_filepaths([(filepath, fp_id)], None, "templates", "filepath", conn_handler, move_files=False)[0] values = (self._id, fpp_id) conn_handler.execute( "INSERT INTO qiita.{0} ({1}, filepath_id) " "VALUES (%s, %s)".format( self._filepath_table, self._id_column), values) except Exception as e: LogEntry.create('Runtime', str(e), info={self.__class__.__name__: self.id}) raise e
def add_filepath(self, filepath, conn_handler=None): r"""Populates the DB tables for storing the filepath and connects the `self` objects with this filepath""" # Check that this function has been called from a subclass self._check_subclass() # Check if the connection handler has been provided. Create a new # one if not. conn_handler = conn_handler if conn_handler else SQLConnectionHandler() if self._table == 'required_sample_info': fp_id = convert_to_id("sample_template", "filepath_type", conn_handler) table = 'sample_template_filepath' column = 'study_id' elif self._table == 'common_prep_info': fp_id = convert_to_id("prep_template", "filepath_type", conn_handler) table = 'prep_template_filepath' column = 'prep_template_id' else: raise QiitaDBNotImplementedError( 'add_filepath for %s' % self._table) try: fpp_id = insert_filepaths([(filepath, fp_id)], None, "templates", "filepath", conn_handler, move_files=False)[0] values = (self._id, fpp_id) conn_handler.execute( "INSERT INTO qiita.{0} ({1}, filepath_id) " "VALUES (%s, %s)".format(table, column), values) except Exception as e: LogEntry.create('Runtime', str(e), info={self.__class__.__name__: self.id}) raise e
def test_add_info(self): """""" log_entry = LogEntry.create('Warning', 'warning test', info={ 1: 2, 'test': 'yeah' }) log_entry.add_info({'another': 'set', 'of': 'entries', 'test': 3}) self.assertEqual(log_entry.info, [{ '1': 2, 'test': 'yeah' }, { 'another': 'set', 'of': 'entries', 'test': 3 }])
def parse_EBI_reply(self, curl_result, test=False): """Parse and verify reply from EBI after sending XML files Parameters ---------- curl_result : str The reply sent by EBI after sending XML files test : bool If true we will assume is a test and ignore some parsing errors Returns ------- str The study accession number. None in case of failure dict of {str: str} The sample accession numbers, keyed by sample id. None in case of failure dict of {str: str} The biosample accession numbers, keyed by sample id. None in case of failure dict of {str: str} The experiment accession numbers, keyed by sample id. None in case of failure dict of {str: str} The run accession numbers, keyed by sample id. None in case of failure Raises ------ EBISubmissionError If curl_result is not a valid XML file If the ebi subumission has not been successful If multiple study tags are found in the curl result """ try: root = ET.fromstring(curl_result) except ParseError: error_msg = ("The curl result from the EBI submission doesn't " "look like an XML file:\n%s" % curl_result) le = LogEntry.create('Runtime', error_msg) raise EBISubmissionError( "The curl result from the EBI submission doesn't look like " "an XML file. Contact and admin for more information. " "Log id: %d" % le.id) success = root.get('success') == 'true' if not success: # here we want to parse out the errors so the failures are clearer errors = {elem.text for elem in root.iter("ERROR")} raise EBISubmissionError("The EBI submission failed:\n%s" % '\n'.join(errors)) if test: study_accession = 'MyStudyAccession' sample_accessions = {} biosample_accessions = {} experiment_accessions = {} run_accessions = {} return (study_accession, sample_accessions, biosample_accessions, experiment_accessions, run_accessions) study_elem = root.findall("STUDY") if study_elem: if len(study_elem) > 1: raise EBISubmissionError( "Multiple study tags found in EBI reply: %d" % len(study_elem)) study_elem = study_elem[0] study_accession = study_elem.get('accession') else: study_accession = None sample_accessions = {} biosample_accessions = {} for elem in root.iter("SAMPLE"): alias = elem.get('alias') sample_id = self._sample_aliases[alias] sample_accessions[sample_id] = elem.get('accession') ext_id = elem.find('EXT_ID') biosample_accessions[sample_id] = ext_id.get('accession') def data_retriever(key, trans_dict): res = {} for elem in root.iter(key): alias = elem.get('alias') res[trans_dict[alias]] = elem.get('accession') return res experiment_accessions = data_retriever("EXPERIMENT", self._experiment_aliases) run_accessions = data_retriever("RUN", self._run_aliases) return (study_accession, sample_accessions, biosample_accessions, experiment_accessions, run_accessions)
def __init__(self, artifact_id, action): error_msgs = [] if action not in self.valid_ebi_actions: error_msg = ("%s is not a valid EBI submission action, valid " "actions are: %s" % (action, ', '.join(self.valid_ebi_actions))) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) ena_ontology = Ontology(convert_to_id('ENA', 'ontology')) self.action = action self.artifact = Artifact(artifact_id) if not self.artifact.can_be_submitted_to_ebi: error_msg = ("Artifact %d cannot be submitted to EBI" % self.artifact.id) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) self.study = self.artifact.study self.sample_template = self.study.sample_template # If we reach this point, there should be only one prep template # attached to the artifact. By design, each artifact has at least one # prep template. Artifacts with more than one prep template cannot be # submitted to EBI, so the attribute 'can_be_submitted_to_ebi' should # be set to false, which is checked in the previous if statement self.prep_template = self.artifact.prep_templates[0] if self.artifact.is_submitted_to_ebi and action != 'MODIFY': error_msg = ("Cannot resubmit! Artifact %d has already " "been submitted to EBI." % artifact_id) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) self.artifact_id = artifact_id self.study_title = self.study.title self.study_abstract = self.study.info['study_abstract'] it = self.prep_template.investigation_type if it in ena_ontology.terms: self.investigation_type = it self.new_investigation_type = None elif it in ena_ontology.user_defined_terms: self.investigation_type = 'Other' self.new_investigation_type = it else: # This should never happen error_msgs.append("Unrecognized investigation type: '%s'. This " "term is neither one of the official terms nor " "one of the user-defined terms in the ENA " "ontology." % it) _, base_fp = get_mountpoint("preprocessed_data")[0] self.ebi_dir = '%d_ebi_submission' % artifact_id self.full_ebi_dir = join(base_fp, self.ebi_dir) self.ascp_reply = join(self.full_ebi_dir, 'ascp_reply.txt') self.curl_reply = join(self.full_ebi_dir, 'curl_reply.xml') self.xml_dir = join(self.full_ebi_dir, 'xml_dir') self.study_xml_fp = None self.sample_xml_fp = None self.experiment_xml_fp = None self.run_xml_fp = None self.submission_xml_fp = None self.publications = self.study.publications # getting the restrictions st_restrictions = [self.sample_template.columns_restrictions['EBI']] pt_restrictions = [self.prep_template.columns_restrictions['EBI']] if self.artifact.data_type in TARGET_GENE_DATA_TYPES: # adding restictions on primer and barcode as these are # conditionally requiered for target gene pt_restrictions.append( PREP_TEMPLATE_COLUMNS_TARGET_GENE['demultiplex']) st_missing = self.sample_template.check_restrictions(st_restrictions) pt_missing = self.prep_template.check_restrictions(pt_restrictions) # testing if there are any missing columns if st_missing: error_msgs.append("Missing column in the sample template: %s" % ', '.join(list(st_missing))) if pt_missing: error_msgs.append("Missing column in the prep template: %s" % ', '.join(list(pt_missing))) # generating all samples from sample template self.samples = {} self.samples_prep = {} self.sample_demux_fps = {} get_output_fp = partial(join, self.full_ebi_dir) nvp = [] nvim = [] for k, v in viewitems(self.sample_template): if k not in self.prep_template: continue sample_prep = self.prep_template[k] # validating required fields if ('platform' not in sample_prep or sample_prep['platform'] is None): nvp.append(k) else: platform = sample_prep['platform'].upper() if platform not in self.valid_platforms: nvp.append(k) else: if ('instrument_model' not in sample_prep or sample_prep['instrument_model'] is None): nvim.append(k) else: im = sample_prep['instrument_model'].upper() if im not in self.valid_platforms[platform]: nvim.append(k) self.samples[k] = v self.samples_prep[k] = sample_prep self.sample_demux_fps[k] = get_output_fp("%s.fastq.gz" % k) if nvp: error_msgs.append("These samples do not have a valid platform " "(instrumet model wasn't checked): %s" % (', '.join(nvp))) if nvim: error_msgs.append("These samples do not have a valid instrument " "model: %s" % (', '.join(nvim))) if error_msgs: error_msgs = ("Errors found during EBI submission for study #%d, " "artifact #%d and prep template #%d:\n%s" % (self.study.id, artifact_id, self.prep_template.id, '\n'.join(error_msgs))) LogEntry.create('Runtime', error_msgs) raise EBISubmissionError(error_msgs) self._sample_aliases = {} self._experiment_aliases = {} self._run_aliases = {} self._ebi_sample_accessions = \ self.sample_template.ebi_sample_accessions self._ebi_experiment_accessions = \ self.prep_template.ebi_experiment_accessions
def generate_demultiplexed_fastq(self, rewrite_fastq=False, mtime=None): """Generates demultiplexed fastq Parameters ---------- rewrite_fastq : bool, optional If true, it forces the rewrite of the fastq files mtime : float, optional The time to use when creating the gz files. If None, the current time will be used by gzip.GzipFile. This is useful for testing. Returns ------- demux_samples List of successful demultiplexed samples Notes ----- - As a performace feature, this method will check if self.full_ebi_dir already exists and, if it does, the script will assume that in a previous execution this step was performed correctly and will simply read the file names from self.full_ebi_dir - When the object is created (init), samples, samples_prep and sample_demux_fps hold values for all available samples in the database. Here some of those values will be deleted (del's, within the loops) for those cases where the fastq.gz files weren't written or exist. This is an indication that they had no sequences and this kind of files are not accepted in EBI Raises ------ EBISubmissionError - The demux file couldn't be read - All samples are removed """ ar = self.artifact dir_not_exists = not isdir(self.full_ebi_dir) if dir_not_exists or rewrite_fastq: makedirs(self.full_ebi_dir) # An artifact will hold only one file of type `preprocessed_demux` # Thus, we only use the first one (the only one present) demux = [ path for _, path, ftype in ar.filepaths if ftype == 'preprocessed_demux' ][0] demux_samples = set() with open_file(demux) as demux_fh: if not isinstance(demux_fh, File): error_msg = "'%s' doesn't look like a demux file" % demux LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) for s, i in to_per_sample_ascii(demux_fh, self.prep_template.keys()): sample_fp = self.sample_demux_fps[s] wrote_sequences = False with GzipFile(sample_fp, mode='w', mtime=mtime) as fh: for record in i: fh.write(record) wrote_sequences = True if wrote_sequences: demux_samples.add(s) else: del (self.samples[s]) del (self.samples_prep[s]) del (self.sample_demux_fps[s]) remove(sample_fp) else: demux_samples = set() extension = '.fastq.gz' extension_len = len(extension) for f in listdir(self.full_ebi_dir): fpath = join(self.full_ebi_dir, f) if isfile(fpath) and f.endswith(extension): demux_samples.add(f[:-extension_len]) missing_samples = set(self.samples.keys()).difference( set(demux_samples)) for ms in missing_samples: del (self.samples[ms]) del (self.samples_prep[ms]) del (self.sample_demux_fps[ms]) if not demux_samples: error_msg = ("All samples were removed from the submission " "because the demux file is empty or the sample names " "do not match.") LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) return demux_samples
def artifact_patch_request(user, artifact_id, req_op, req_path, req_value=None, req_from=None): """Modifies an attribute of the artifact Parameters ---------- user : qiita_db.user.User The user performing the patch operation artifact_id : int Id of the artifact in which the patch operation is being performed req_op : str The operation to perform on the artifact req_path : str The prep information and attribute to patch req_value : str, optional The value that needs to be modified req_from : str, optional The original path of the element Raises ------ QiitaHTTPError If `req_op` != 'replace' If the path parameter is incorrect If missing req_value If the attribute to replace is not known """ if req_op == 'replace': req_path = [v for v in req_path.split('/') if v] if len(req_path) != 1: raise QiitaHTTPError(404, 'Incorrect path parameter') attribute = req_path[0] # Check if the user actually has access to the artifact artifact = Artifact(artifact_id) check_artifact_access(user, artifact) if not req_value: raise QiitaHTTPError(404, 'Missing value to replace') if attribute == 'name': artifact.name = req_value return elif attribute == 'visibility': if req_value not in get_visibilities(): raise QiitaHTTPError( 400, 'Unknown visibility value: %s' % req_value) if (req_value == 'private' and qiita_config.require_approval and not user.level == 'admin'): raise QiitaHTTPError( 403, 'User does not have permissions ' 'to approve change') try: artifact.visibility = req_value except Exception as e: raise QiitaHTTPError(403, str(e).replace('\n', '<br/>')) sid = artifact.study.id if artifact.visibility == 'awaiting_approval': email_to = '*****@*****.**' subject = ('QIITA: Artifact %s awaiting_approval. Study %d, ' 'Prep %d' % (artifact_id, sid, artifact.prep_templates[0].id)) message = ('%s requested approval. <a ' 'href="https://qiita.ucsd.edu/study/description/' '%d">Study %d</a>.' % (user.email, sid, sid)) try: send_email(email_to, subject, message) except Exception: msg = ("Couldn't send email to admins, please email us " "directly to <a href='mailto:{0}'>{0}</a>.".format( email_to)) raise QiitaHTTPError(400, msg) else: msg = '%s changed artifact %s (study %d) to %s' % ( user.email, artifact_id, sid, req_value) LogEntry.create('Warning', msg) else: # We don't understand the attribute so return an error raise QiitaHTTPError( 404, 'Attribute "%s" not found. Please, ' 'check the path parameter' % attribute) else: raise QiitaHTTPError( 400, 'Operation "%s" not supported. Current ' 'supported operations: replace' % req_op)
def test_complete_job(self): # Complete success pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, pd.DataFrame({'new_col': { '1.SKD6.640190': 1 }}), Study(1), '16S') c_job = ProcessingJob.create( User('*****@*****.**'), Parameters.load(Command.get_validator('BIOM'), values_dict={ 'template': pt.id, 'files': dumps({'BIOM': ['file']}), 'artifact_type': 'BIOM' }), True) c_job._set_status('running') fd, fp = mkstemp(suffix='_table.biom') close(fd) with open(fp, 'w') as f: f.write('\n') self._clean_up_files.append(fp) exp_artifact_count = get_count('qiita.artifact') + 1 payload = dumps({ 'success': True, 'error': '', 'artifacts': { 'OTU table': { 'filepaths': [(fp, 'biom')], 'artifact_type': 'BIOM' } } }) job = self._create_job('complete_job', { 'job_id': c_job.id, 'payload': payload }) private_task(job.id) self.assertEqual(job.status, 'success') self.assertEqual(c_job.status, 'success') self.assertEqual(get_count('qiita.artifact'), exp_artifact_count) # Complete job error payload = dumps({'success': False, 'error': 'Job failure'}) job = self._create_job('complete_job', { 'job_id': 'bcc7ebcd-39c1-43e4-af2d-822e3589f14d', 'payload': payload }) private_task(job.id) self.assertEqual(job.status, 'success') c_job = ProcessingJob('bcc7ebcd-39c1-43e4-af2d-822e3589f14d') self.assertEqual(c_job.status, 'error') self.assertEqual(c_job.log, LogEntry.newest_records(numrecords=1)[0]) self.assertEqual(c_job.log.msg, 'Job failure') # Complete internal error pt = npt.assert_warns(QiitaDBWarning, PrepTemplate.create, pd.DataFrame({'new_col': { '1.SKD6.640190': 1 }}), Study(1), '16S') c_job = ProcessingJob.create( User('*****@*****.**'), Parameters.load(Command.get_validator('BIOM'), values_dict={ 'template': pt.id, 'files': dumps({'BIOM': ['file']}), 'artifact_type': 'BIOM' }), True) c_job._set_status('running') fp = '/surprised/if/this/path/exists.biom' payload = dumps({ 'success': True, 'error': '', 'artifacts': { 'OTU table': { 'filepaths': [(fp, 'biom')], 'artifact_type': 'BIOM' } } }) job = self._create_job('complete_job', { 'job_id': c_job.id, 'payload': payload }) private_task(job.id) self.assertEqual(job.status, 'success') self.assertEqual(c_job.status, 'error') self.assertIn('No such file or directory', c_job.log.msg)
def generate_demultiplexed_fastq(self, rewrite_fastq=False, mtime=None): """Generates demultiplexed fastq Parameters ---------- rewrite_fastq : bool, optional If true, it forces the rewrite of the fastq files mtime : float, optional The time to use when creating the gz files. If None, the current time will be used by gzip.GzipFile. This is useful for testing. Returns ------- demux_samples List of successful demultiplexed samples Notes ----- - As a performace feature, this method will check if self.full_ebi_dir already exists and, if it does, the script will assume that in a previous execution this step was performed correctly and will simply read the file names from self.full_ebi_dir - When the object is created (init), samples, samples_prep and sample_demux_fps hold values for all available samples in the database. Here some of those values will be deleted (del's, within the loops) for those cases where the fastq.gz files weren't written or exist. This is an indication that they had no sequences and this kind of files are not accepted in EBI Raises ------ EBISubmissionError - The demux file couldn't be read - All samples are removed """ dir_not_exists = not isdir(self.full_ebi_dir) missing_samples = [] if dir_not_exists or rewrite_fastq: makedirs(self.full_ebi_dir) if self.artifact.artifact_type == 'per_sample_FASTQ': demux_samples, missing_samples = \ self._generate_demultiplexed_fastq_per_sample_FASTQ() else: demux_samples = self._generate_demultiplexed_fastq_demux(mtime) else: demux_samples = set() extension = '.fastq.gz' extension_len = len(extension) for f in listdir(self.full_ebi_dir): fpath = join(self.full_ebi_dir, f) if isfile(fpath) and f.endswith(extension): demux_samples.add(f[:-extension_len]) missing_samples = set( self.samples.keys()).difference(demux_samples) if missing_samples: for ms in missing_samples: del(self.samples[ms]) del(self.samples_prep[ms]) del(self.sample_demux_fps[ms]) if not demux_samples: error_msg = ("All samples were removed from the submission " "because the demux file is empty or the sample names " "do not match.") LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) return demux_samples
def parse_EBI_reply(self, curl_result): """Parse and verify reply from EBI after sending XML files Parameters ---------- curl_result : str The reply sent by EBI after sending XML files Returns ------- str The study accession number. None in case of failure dict of {str: str} The sample accession numbers, keyed by sample id. None in case of failure dict of {str: str} The biosample accession numbers, keyed by sample id. None in case of failure dict of {str: str} The experiment accession numbers, keyed by sample id. None in case of failure dict of {str: str} The run accession numbers, keyed by sample id. None in case of failure Raises ------ EBISubmissionError If curl_result is not a valid XML file If the ebi subumission has not been successful If multiple study tags are found in the curl result """ try: root = ET.fromstring(curl_result) except ParseError: error_msg = ("The curl result from the EBI submission doesn't " "look like an XML file:\n%s" % curl_result) le = LogEntry.create('Runtime', error_msg) raise EBISubmissionError( "The curl result from the EBI submission doesn't look like " "an XML file. Contact and admin for more information. " "Log id: %d" % le.id) success = root.get('success') == 'true' if not success: raise EBISubmissionError("The EBI submission failed:\n%s" % curl_result) study_elem = root.findall("STUDY") if study_elem: if len(study_elem) > 1: raise EBISubmissionError( "Multiple study tags found in EBI reply: %d" % len(study_elem)) study_elem = study_elem[0] study_accession = study_elem.get('accession') else: study_accession = None sample_accessions = {} biosample_accessions = {} for elem in root.iter("SAMPLE"): alias = elem.get('alias') sample_id = self._sample_aliases[alias] sample_accessions[sample_id] = elem.get('accession') ext_id = elem.find('EXT_ID') biosample_accessions[sample_id] = ext_id.get('accession') def data_retriever(key, trans_dict): res = {} for elem in root.iter(key): alias = elem.get('alias') res[trans_dict[alias]] = elem.get('accession') return res experiment_accessions = data_retriever("EXPERIMENT", self._experiment_aliases) run_accessions = data_retriever("RUN", self._run_aliases) return (study_accession, sample_accessions, biosample_accessions, experiment_accessions, run_accessions)
def __init__(self, artifact_id, action): error_msgs = [] if action not in self.valid_ebi_actions: error_msg = ("%s is not a valid EBI submission action, valid " "actions are: %s" % (action, ', '.join(self.valid_ebi_actions))) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) ena_ontology = Ontology(convert_to_id('ENA', 'ontology')) self.action = action self.artifact = Artifact(artifact_id) if not self.artifact.can_be_submitted_to_ebi: error_msg = ("Artifact %d cannot be submitted to EBI" % self.artifact.id) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) self.study = self.artifact.study self.sample_template = self.study.sample_template # If we reach this point, there should be only one prep template # attached to the artifact. By design, each artifact has at least one # prep template. Artifacts with more than one prep template cannot be # submitted to EBI, so the attribute 'can_be_submitted_to_ebi' should # be set to false, which is checked in the previous if statement self.prep_template = self.artifact.prep_templates[0] if self.artifact.is_submitted_to_ebi and action != 'MODIFY': error_msg = ("Cannot resubmit! Artifact %d has already " "been submitted to EBI." % artifact_id) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) status = self.study.ebi_submission_status if status in self.valid_ebi_submission_states: error_msg = ("Cannot perform parallel EBI submission for the same " "study. Current status of the study: %s" % status) LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) self.artifact_id = artifact_id self.study_title = self.study.title self.study_abstract = self.study.info['study_abstract'] it = self.prep_template.investigation_type if it in ena_ontology.terms: self.investigation_type = it self.new_investigation_type = None elif it in ena_ontology.user_defined_terms: self.investigation_type = 'Other' self.new_investigation_type = it else: # This should never happen error_msgs.append("Unrecognized investigation type: '%s'. This " "term is neither one of the official terms nor " "one of the user-defined terms in the ENA " "ontology." % it) _, base_fp = get_mountpoint("preprocessed_data")[0] self.ebi_dir = '%d_ebi_submission' % artifact_id self.full_ebi_dir = join(base_fp, self.ebi_dir) self.ascp_reply = join(self.full_ebi_dir, 'ascp_reply.txt') self.curl_reply = join(self.full_ebi_dir, 'curl_reply.xml') self.xml_dir = join(self.full_ebi_dir, 'xml_dir') self.study_xml_fp = None self.sample_xml_fp = None self.experiment_xml_fp = None self.run_xml_fp = None self.submission_xml_fp = None self.publications = self.study.publications # getting the restrictions st_restrictions = [self.sample_template.columns_restrictions['EBI']] pt_restrictions = [self.prep_template.columns_restrictions['EBI']] if self.artifact.data_type in TARGET_GENE_DATA_TYPES: # adding restictions on primer and barcode as these are # conditionally requiered for target gene pt_restrictions.append( PREP_TEMPLATE_COLUMNS_TARGET_GENE['demultiplex']) st_missing = self.sample_template.check_restrictions(st_restrictions) pt_missing = self.prep_template.check_restrictions(pt_restrictions) # testing if there are any missing columns if st_missing: error_msgs.append("Missing column in the sample template: %s" % ', '.join(list(st_missing))) if pt_missing: error_msgs.append("Missing column in the prep template: %s" % ', '.join(list(pt_missing))) # generating all samples from sample template self.samples = {} self.samples_prep = {} self.sample_demux_fps = {} get_output_fp = partial(join, self.full_ebi_dir) nvp = [] nvim = [] for k, v in viewitems(self.sample_template): if k not in self.prep_template: continue sample_prep = self.prep_template[k] # validating required fields if ('platform' not in sample_prep or sample_prep['platform'] is None): nvp.append(k) else: platform = sample_prep['platform'].upper() if platform not in self.valid_platforms: nvp.append(k) else: if ('instrument_model' not in sample_prep or sample_prep['instrument_model'] is None): nvim.append(k) else: im = sample_prep['instrument_model'].upper() if im not in self.valid_platforms[platform]: nvim.append(k) self.samples[k] = v self.samples_prep[k] = sample_prep self.sample_demux_fps[k] = get_output_fp("%s.fastq.gz" % k) if nvp: error_msgs.append("These samples do not have a valid platform " "(instrumet model wasn't checked): %s" % ( ', '.join(nvp))) if nvim: error_msgs.append("These samples do not have a valid instrument " "model: %s" % (', '.join(nvim))) if error_msgs: error_msgs = ("Errors found during EBI submission for study #%d, " "artifact #%d and prep template #%d:\n%s" % (self.study.id, artifact_id, self.prep_template.id, '\n'.join(error_msgs))) LogEntry.create('Runtime', error_msgs) raise EBISubmissionError(error_msgs) self._sample_aliases = {} self._experiment_aliases = {} self._run_aliases = {} self._ebi_sample_accessions = \ self.sample_template.ebi_sample_accessions self._ebi_experiment_accessions = \ self.prep_template.ebi_experiment_accessions
def generate_demultiplexed_fastq(self, rewrite_fastq=False, mtime=None): """Generates demultiplexed fastq Parameters ---------- rewrite_fastq : bool, optional If true, it forces the rewrite of the fastq files mtime : float, optional The time to use when creating the gz files. If None, the current time will be used by gzip.GzipFile. This is useful for testing. Returns ------- demux_samples List of successful demultiplexed samples Notes ----- - As a performace feature, this method will check if self.full_ebi_dir already exists and, if it does, the script will assume that in a previous execution this step was performed correctly and will simply read the file names from self.full_ebi_dir - When the object is created (init), samples, samples_prep and sample_demux_fps hold values for all available samples in the database. Here some of those values will be deleted (del's, within the loops) for those cases where the fastq.gz files weren't written or exist. This is an indication that they had no sequences and this kind of files are not accepted in EBI Raises ------ EBISubmissionError - The demux file couldn't be read - All samples are removed """ dir_not_exists = not isdir(self.full_ebi_dir) missing_samples = [] if dir_not_exists or rewrite_fastq: # if it exists, remove folder and start from scratch if isdir(self.full_ebi_dir): rmtree(self.full_ebi_dir) makedirs(self.full_ebi_dir) if self.artifact.artifact_type == 'per_sample_FASTQ': demux_samples, missing_samples = \ self._generate_demultiplexed_fastq_per_sample_FASTQ() else: demux_samples = self._generate_demultiplexed_fastq_demux(mtime) else: demux_samples = set() extension = '.fastq.gz' extension_len = len(extension) for f in listdir(self.full_ebi_dir): fpath = join(self.full_ebi_dir, f) if isfile(fpath) and f.endswith(extension): demux_samples.add(f[:-extension_len]) missing_samples = set( self.samples.keys()).difference(demux_samples) if missing_samples: for ms in missing_samples: del (self.samples[ms]) del (self.samples_prep[ms]) del (self.sample_demux_fps[ms]) if not demux_samples: error_msg = ("All samples were removed from the submission " "because the demux file is empty or the sample names " "do not match.") LogEntry.create('Runtime', error_msg) raise EBISubmissionError(error_msg) return demux_samples
def submit_EBI(artifact_id, action, send, test=False): """Submit an artifact to EBI Parameters ---------- artifact_id : int The artifact id action : %s The action to perform with this data send : bool True to actually send the files test : bool If True some restrictions will be ignored, only used in parse_EBI_reply """ # step 1: init and validate ebi_submission = EBISubmission(artifact_id, action) # step 2: generate demux fastq files try: ebi_submission.generate_demultiplexed_fastq() except Exception: error_msg = format_exc() if isdir(ebi_submission.full_ebi_dir): rmtree(ebi_submission.full_ebi_dir) LogEntry.create('Runtime', error_msg, info={'ebi_submission': artifact_id}) raise # step 3: generate and write xml files ebi_submission.generate_xml_files() if send: # getting aspera's password old_ascp_pass = environ.get('ASPERA_SCP_PASS', '') if old_ascp_pass == '': environ['ASPERA_SCP_PASS'] = qiita_config.ebi_seq_xfer_pass ascp_passwd = environ['ASPERA_SCP_PASS'] LogEntry.create('Runtime', ('Submission of sequences of pre_processed_id: ' '%d completed successfully' % artifact_id)) # step 4: sending sequences if action != 'MODIFY': LogEntry.create('Runtime', ("Submitting sequences for pre_processed_id: " "%d" % artifact_id)) for cmd in ebi_submission.generate_send_sequences_cmd(): stdout, stderr, rv = system_call(cmd) if rv != 0: error_msg = ("ASCP Error:\nStd output:%s\nStd error:%s" % (stdout, stderr)) environ['ASPERA_SCP_PASS'] = old_ascp_pass raise ComputeError(error_msg) open(ebi_submission.ascp_reply, 'a').write('stdout:\n%s\n\nstderr: %s' % (stdout, stderr)) environ['ASPERA_SCP_PASS'] = old_ascp_pass # step 5: sending xml and parsing answer xmls_cmds = ebi_submission.generate_curl_command( ebi_seq_xfer_pass=ascp_passwd) LogEntry.create('Runtime', ("Submitting XMLs for pre_processed_id: " "%d" % artifact_id)) xml_content, stderr, rv = system_call(xmls_cmds) if rv != 0: error_msg = ("Error:\nStd output:%s\nStd error:%s" % (xml_content, stderr)) raise ComputeError(error_msg) else: LogEntry.create('Runtime', ('Submission of sequences of pre_processed_id: ' '%d completed successfully' % artifact_id)) open(ebi_submission.curl_reply, 'w').write('stdout:\n%s\n\nstderr: %s' % (xml_content, stderr)) try: st_acc, sa_acc, bio_acc, ex_acc, run_acc = \ ebi_submission.parse_EBI_reply(xml_content, test=test) except EBISubmissionError as e: error = str(e) le = LogEntry.create('Fatal', "Command: %s\nError: %s\n" % (xml_content, error), info={'ebi_submission': artifact_id}) raise ComputeError("EBI Submission failed! Log id: %d\n%s" % (le.id, error)) if action == 'ADD' or test: if st_acc: ebi_submission.study.ebi_study_accession = st_acc if sa_acc: ebi_submission.sample_template.ebi_sample_accessions = sa_acc if bio_acc: ebi_submission.sample_template.biosample_accessions = bio_acc if ex_acc: ebi_submission.prep_template.ebi_experiment_accessions = ex_acc ebi_submission.artifact.ebi_run_accessions = run_acc else: st_acc, sa_acc, bio_acc, ex_acc, run_acc = None, None, None, None, None return st_acc, sa_acc, bio_acc, ex_acc, run_acc
def get(self): self.check_access() logentries = LogEntry.newest_records() self.render("error_log.html", logentries=logentries)