def upload(self, object_ids_f=None, feature_matrix_f=None): smi = 2 self.fetch_session_data() if self.project_id is None: return self.no_project_selected() pm = self.project_manager fe = pm.get_feature_extraction() kw_args = self.get_template_args(smi) kw_args["fe"] = fe # upload custom feature matrix error_msg = None if object_ids_f and feature_matrix_f: if object_ids_f.file is None: error_msg = "No protein ids file selected" elif feature_matrix_f.file is None: error_msg = "No feature matrix file selected" else: error_msg = pm.add_custom_features(self.project_id, object_ids_f, feature_matrix_f) if error_msg == "": # redirect to feature list if no errors occured raise cherrypy.HTTPRedirect(self.get_url(0)) else: kw_args["msg"] = error_msg template_f = self.get_template_f(smi) return spiceweb.get_template(template_f, **kw_args)
def details(self, cl_id): smi = 2 self.fetch_session_data() pm = self.project_manager if(self.project_id is None): return self.no_project_selected() if not(cl_id in pm.get_classifier_ids()): return self.no_such_classifier() kw_args = self.get_template_args(smi) kw_args['cl_ids'] = self.project_manager.get_classifier_ids() kw_args['cl_id'] = cl_id if(self.project_manager.get_classifier_finished(cl_id)): cv_results, avg_results = pm.get_classifier_result(cl_id) kw_args['cv_results'] = cv_results kw_args['avg_results'] = avg_results kw_args['cl_settings'] = pm.get_classifier_settings(cl_id) kw_args['cl_names'] = self.CL_NAMES roc_f = pm.get_roc_f(cl_id) if(roc_f and os.path.exists(roc_f)): kw_args['roc_url'] = '%s%s%s/%s' % (self.root_url, self.mm_url, 'roc', cl_id) else: kw_args['roc_url'] = None template_f = self.get_template_f(smi) return spiceweb.get_template(template_f, **kw_args)
def new(self, project_id=None, fasta_file=None, sequence_type=None, use_reference=None, taxon_domain=None, taxon=None): self.fetch_session_data() smi = 1 kw_args = self.get_template_args(smi) # does this stay the same over time??? # taxon domain and corresponding uniprot ancestor numbers kw_args['taxon_domains'] = self.TAXON_DOMAINS error_msg = None # start a new project if((fasta_file and sequence_type) and project_id): # taxon domain is not used... taxon_id = None if not(use_reference is None): taxon_id = int(taxon) if(fasta_file.file is None): error_msg = 'No fasta file provided' elif(len(project_id) < 4): error_msg = 'Project id should be at least 4 characters long' elif(' ' in project_id): error_msg = 'Spaces are not allowed in the project id' elif not(re.match('^[A-Za-z0-9_-]*$', project_id)): error_msg = 'Only characters, digits, dashes, and ' +\ 'underscores are allowed in a project id' else: try: # initiate new project error_msg = self.project_manager.start_new_project( project_id, fasta_file, sequence_type, reference_taxon=taxon_id) except: print(traceback.format_exc()) error_msg = 'Error creating new project' if(error_msg == ''): # store project id in session cherrypy.session[self.SESSION_PROJECT_KEY] = project_id # redirect to project list page url = self.get_url(0) raise cherrypy.HTTPRedirect(url) else: kw_args['msg'] = error_msg template_f = self.get_template_f(smi) return spiceweb.get_template(template_f, **kw_args)
def list(self): self.fetch_session_data() smi = 0 projects = self.project_manager.get_projects() kw_args = self.get_template_args(smi) kw_args['projects'] = projects template_f = self.get_template_f(smi) return spiceweb.get_template(template_f, **kw_args)
def calculate(self, featcat_id=None): smi = 1 self.fetch_session_data() if self.project_id is None: return self.no_project_selected() pm = self.project_manager fe = pm.get_feature_extraction() kw_args = self.get_template_args(smi) kw_args["fe"] = fe if not (featcat_id is None): proteins = fe.protein_data_set.proteins featcat = fe.PROTEIN_FEATURE_CATEGORIES[featcat_id.split("_")[0]] missing_data = [] # check if data required for feature calculation is available for get_data_func, all_objects in featcat.required_data: name = " ".join(get_data_func.__name__.split("_")[1:]) if all_objects: if not (all([get_data_func(p) for p in proteins])): missing_data.append(name) else: if not (any([get_data_func(p) for p in proteins])): missing_data.append(name) if len(missing_data) > 0: kw_args["msg"] = "<p>Required data is missing: %s</p>" % (", ".join(missing_data)) # check if this feature category is already in the feature matrix elif featcat_id in fe.available_protein_featcat_ids(): kw_args["msg"] = "This feature category has " + "already been calculated" else: # put job in queue pm.run_feature_extraction([featcat_id]) # redirect to feature list page raise cherrypy.HTTPRedirect(self.get_url(0)) kw_args["aaindex_scale_ids"] = sequtil.aaindex_scale_ids kw_args["pseaac_scale_ids"] = sequtil.pseaac_scale_ids template_f = self.get_template_f(smi) return spiceweb.get_template(template_f, **kw_args)
def show_feature_data(self, smi): self.fetch_session_data() if self.project_id is None: return self.no_project_selected() kw_args = self.get_template_args(smi) kw_args["fe"] = self.project_manager.get_feature_extraction() # kw_args['show_filter'] = self.project_manager.get_feat_calc_status() kw_args["show_filter"] = True template_f = self.get_template_f(smi) return spiceweb.get_template(template_f, **kw_args)
def list(self): smi = 0 self.fetch_session_data() if(self.project_id is None): return self.no_project_selected() pm = self.project_manager cl_ids = pm.get_classifier_ids() kw_args = self.get_template_args(smi) kw_args['cl_ids'] = cl_ids template_f = self.get_template_f(smi) return spiceweb.get_template(template_f, **kw_args)
def list(self): smi = 0 self.fetch_session_data() if self.project_id is None: return self.no_project_selected() pm = self.project_manager fe = pm.get_feature_extraction() kw_args = self.get_template_args(smi) kw_args["fe"] = fe kw_args["featcats"] = fe.PROTEIN_FEATURE_CATEGORIES kw_args["feat_status"] = pm.get_feat_calc_status() template_f = self.get_template_f(smi) return spiceweb.get_template(template_f, **kw_args)
def load_example(self, example_number): self.fetch_session_data() smi = 1 try: example_number = int(example_number) except ValueError: example_number = -1 if(example_number < 0 or example_number >= len(self.EXAMPLES)): kw_args = self.get_template_args(smi) template_f = 'no_such_example.html' return spiceweb.get_template(template_f, **kw_args) pm = self.project_manager (pid, seq_f, seq_type, labeling_f) = self.EXAMPLES[example_number] root_d = spiceweb.spiceweb_dir seq_f = os.path.join(root_d, self.EXAMPLE_DIR, pid, seq_f) labeling_f = os.path.join(root_d, self.EXAMPLE_DIR, pid, labeling_f) error_msg = pm.start_example_project(pid, seq_f, seq_type, labeling_f) if(error_msg == ''): # store project id in session cherrypy.session[self.SESSION_PROJECT_KEY] = pid # redirect to project list page url = self.get_url(0) raise cherrypy.HTTPRedirect(url) else: print print 'This should not happen...' print error_msg url = self.get_url(0) raise cherrypy.HTTPRedirect(url)
def new(self, cl_type=None, n_fold_cv=None, labeling_name=None, class_ids=None, feat_ids=None): smi = 1 self.fetch_session_data() if(self.project_id is None): return self.no_project_selected() error_msg = None # start classification job if required arguments from form are there if not(cl_type is None or n_fold_cv is None): pm = self.project_manager # check the number of class ids if(len(class_ids) < 2): error_msg = 'At least two classes should be provided.' elif(len(feat_ids) < 1): error_msg = 'No features selected.' else: pm.run_classification(cl_type, n_fold_cv, labeling_name, class_ids, feat_ids) # redirect to classifier list page raise cherrypy.HTTPRedirect(self.get_url(0)) # show form otherwise else: kw_args = self.get_template_args(smi) kw_args['fe'] = self.project_manager.get_feature_extraction() kw_args['show_filter'] = True kw_args['error_msg'] = error_msg kw_args['cl_ids'] = self.project_manager.get_classifier_ids() template_f = self.get_template_f(smi) return spiceweb.get_template(template_f, **kw_args)
def details(self, project_id, data_type=None, data_name=None, data_file=None): self.fetch_session_data() smi = 2 # first check if the provided project_id excists existing_projects = [p[0] for p in self.project_manager.get_projects()] if not(project_id in existing_projects): # return message that this project does not exist kw_args = self.get_template_args(smi) template_f = 'no_such_project.html' return spiceweb.get_template(template_f, **kw_args) # store project id in session cherrypy.session[self.SESSION_PROJECT_KEY] = project_id # reset the session data, using the new project id self.fetch_session_data() msg_lab = '' msg_seq = '' # in case of a data file upload if((data_type and data_name) and data_file): pm = self.project_manager # the upload labeling case if(data_type == 'labeling'): # check labeling input data if(data_file.file is None): msg_lab = 'No labeling file provided' elif(' ' in data_name): msg_lab = 'Spaces are not allowed in the project id' elif not(re.match('^[A-Za-z0-9_-]*$', data_name)): msg_lab = 'Only characters, digits, dashes, and ' +\ 'underscores are allowed in a project id' # if no incorrect input data else: # try to add the labeling, storing errors in msg_lab try: msg_lab = pm.add_labeling(data_name, data_file.file) except Exception: print(traceback.format_exc()) msg_lab = 'Error adding labeling' # chop labeling message to reasonable size if(len(msg_lab) > 100): msg_lab = msg_lab[:100] + '...' # the upload sequence data case elif(data_type == 'data_source'): # check sequence input data if(data_file.file == None): msg_seq = 'No file provided.' # if no incorrect input data else: # try to add sequence data try: msg_seq = pm.add_data_source(data_name, data_file.file) except Exception: msg_seq = 'Error adding sequence data.' if(msg_seq[:13] == 'Error in data'): msg_seq = msg_seq + '<br /><br />NOTE:<ul><li>Secundary structure sequences should consist of the letters C, H, and E (same as output psipred)</li><li>Solvent accessibility sequences should consist of the letters B (buried), and E (exposed)</li></ul>' fe = self.project_manager.get_feature_extraction() kw_args = self.get_template_args(smi) kw_args['fe'] = fe kw_args['data_sources'] = ['prot_seq', 'orf_seq', 'ss_seq', 'sa_seq'] kw_args['msg_lab'] = msg_lab kw_args['msg_seq'] = msg_seq template_f = self.get_template_f(smi) return spiceweb.get_template(template_f, **kw_args)
def no_project_selected(self): kw_args = self.get_template_args(0) template_f = "no_project_selected.html" return spiceweb.get_template(template_f, **kw_args)
def no_such_classifier(self): kw_args = self.get_template_args(0) template_f = 'no_such_classifier.html' return spiceweb.get_template(template_f, **kw_args)
def run(self, cl_id, data_set=None): smi = 3 self.fetch_session_data() pm = self.project_manager fe = pm.get_feature_extraction() if(self.project_id is None): return self.no_project_selected() if not(cl_id in pm.get_classifier_ids()): return self.no_such_classifier() kw_args = self.get_template_args(smi) kw_args['cl_id'] = cl_id kw_args['cl_ids'] = pm.get_classifier_ids() if(self.project_manager.get_classifier_finished(cl_id)): if not(data_set is None): # required feature categories for running classifier cl_id settings_dict = pm.get_classifier_settings(cl_id) feat_ids = settings_dict['feature_names'] feat_cats = set([f.split('_')[0] for f in feat_ids]) # required sequence data for calculating the feature categories required_seq_data = set() for fc in feat_cats: for ds in fe.PROTEIN_FEATURE_CATEGORIES[fc].required_data: required_seq_data.add(ds) # SWITCH TO OTHER PROJECT FOR CHECKING SEQUENCE AVAILABILITY prev_proj = pm.project_id pm.set_project(data_set) data_set_fe = pm.get_feature_extraction() data_set_proteins = data_set_fe.protein_data_set.proteins # SWITCH BACK TO CURRENT PROJECT pm.set_project(prev_proj) # check if required data is available for data set missing_data = set() for get_data_func, all_objects in required_seq_data: name = ' '.join(get_data_func.__name__.split('_')[1:]) print name print if(all_objects): if not(all([get_data_func(p) for p in data_set_proteins])): missing_data.add(name) else: if not(any([get_data_func(p) for p in data_set_proteins])): missing_data.add(name) if(len(missing_data) > 0): # send error msg to template kw_args['msg'] = '<p>The features that are required for running this classifier can not be calculated for the %s project, because the following sequence data is not available in this project:</p><ul>' % (data_set) for item in sorted(missing_data): kw_args['msg'] += '<li>%ss</li>' % (item) kw_args['msg'] += '</ul>' else: # run classifier on data set pm.run_classify(cl_id, data_set) # redirect to classifier run page raise cherrypy.HTTPRedirect(self.get_url(3) + '/' + cl_id) # fetch all classification data for this classifier all_data_sets = [p[0] for p in pm.get_projects()] classification_status = pm.parse_classify_job_files(cl_id) classification_busy = [] for key in classification_status.keys(): classification_busy.extend(classification_status[key]) classification_unavailable = sorted(set(all_data_sets) - set(classification_busy)) # forward this data to the template: # to inform the template if classifier construction has finished kw_args['classifier_f'] = pm.get_classifier_f(cl_id) # for the drop-down list of data sets without classification kw_args['data_sets'] = classification_unavailable # for the results and status tables kw_args['classification_status'] = classification_status template_f = self.get_template_f(smi) return spiceweb.get_template(template_f, **kw_args)