Example #1
0
    def upload(self, object_ids_f=None, feature_matrix_f=None):

        smi = 2

        self.fetch_session_data()

        if self.project_id is None:
            return self.no_project_selected()

        pm = self.project_manager
        fe = pm.get_feature_extraction()

        kw_args = self.get_template_args(smi)
        kw_args["fe"] = fe

        # upload custom feature matrix
        error_msg = None
        if object_ids_f and feature_matrix_f:

            if object_ids_f.file is None:
                error_msg = "No protein ids file selected"
            elif feature_matrix_f.file is None:
                error_msg = "No feature matrix file selected"
            else:
                error_msg = pm.add_custom_features(self.project_id, object_ids_f, feature_matrix_f)

            if error_msg == "":
                # redirect to feature list if no errors occured
                raise cherrypy.HTTPRedirect(self.get_url(0))

            else:
                kw_args["msg"] = error_msg

        template_f = self.get_template_f(smi)
        return spiceweb.get_template(template_f, **kw_args)
Example #2
0
    def details(self, cl_id):

        smi = 2
        self.fetch_session_data()

        pm = self.project_manager

        if(self.project_id is None):
            return self.no_project_selected()

        if not(cl_id in pm.get_classifier_ids()):
            return self.no_such_classifier()

        kw_args = self.get_template_args(smi)
        kw_args['cl_ids'] = self.project_manager.get_classifier_ids()
        kw_args['cl_id'] = cl_id

        if(self.project_manager.get_classifier_finished(cl_id)):

            cv_results, avg_results = pm.get_classifier_result(cl_id)
            kw_args['cv_results'] = cv_results
            kw_args['avg_results'] = avg_results
            kw_args['cl_settings'] = pm.get_classifier_settings(cl_id)
            kw_args['cl_names'] = self.CL_NAMES
            roc_f = pm.get_roc_f(cl_id)
            if(roc_f and os.path.exists(roc_f)):
                kw_args['roc_url'] = '%s%s%s/%s' % (self.root_url, self.mm_url,
                                                    'roc', cl_id)
            else:
                kw_args['roc_url'] = None

        template_f = self.get_template_f(smi)

        return spiceweb.get_template(template_f, **kw_args)
Example #3
0
    def new(self, project_id=None, fasta_file=None, sequence_type=None,
            use_reference=None, taxon_domain=None, taxon=None):

        self.fetch_session_data()
        smi = 1

        kw_args = self.get_template_args(smi)

        # does this stay the same over time???
        # taxon domain and corresponding uniprot ancestor numbers
        kw_args['taxon_domains'] = self.TAXON_DOMAINS

        error_msg = None

        # start a new project
        if((fasta_file and sequence_type) and project_id):

            # taxon domain is not used...
            taxon_id = None
            if not(use_reference is None):
                taxon_id = int(taxon)

            if(fasta_file.file is None):
                error_msg = 'No fasta file provided'
            elif(len(project_id) < 4):
                error_msg = 'Project id should be at least 4 characters long'
            elif(' ' in project_id):
                error_msg = 'Spaces are not allowed in the project id'
            elif not(re.match('^[A-Za-z0-9_-]*$', project_id)):
                error_msg = 'Only characters, digits, dashes, and ' +\
                            'underscores are allowed in a project id'
            else:
                try:
                    # initiate new project
                    error_msg = self.project_manager.start_new_project(
                            project_id, fasta_file, sequence_type,
                            reference_taxon=taxon_id)
                except:
                    print(traceback.format_exc())
                    error_msg = 'Error creating new project'

            if(error_msg == ''):
                
                # store project id in session
                cherrypy.session[self.SESSION_PROJECT_KEY] = project_id

                # redirect to project list page
                url = self.get_url(0)
                raise cherrypy.HTTPRedirect(url)

            else:
                kw_args['msg'] = error_msg

        template_f = self.get_template_f(smi)

        return spiceweb.get_template(template_f, **kw_args)
Example #4
0
    def list(self):

        self.fetch_session_data()
        smi = 0

        projects = self.project_manager.get_projects()

        kw_args = self.get_template_args(smi)
        kw_args['projects'] = projects

        template_f = self.get_template_f(smi)

        return spiceweb.get_template(template_f, **kw_args)
Example #5
0
    def calculate(self, featcat_id=None):

        smi = 1

        self.fetch_session_data()

        if self.project_id is None:
            return self.no_project_selected()

        pm = self.project_manager
        fe = pm.get_feature_extraction()

        kw_args = self.get_template_args(smi)
        kw_args["fe"] = fe

        if not (featcat_id is None):

            proteins = fe.protein_data_set.proteins
            featcat = fe.PROTEIN_FEATURE_CATEGORIES[featcat_id.split("_")[0]]
            missing_data = []

            # check if data required for feature calculation is available
            for get_data_func, all_objects in featcat.required_data:
                name = " ".join(get_data_func.__name__.split("_")[1:])
                if all_objects:
                    if not (all([get_data_func(p) for p in proteins])):
                        missing_data.append(name)
                else:
                    if not (any([get_data_func(p) for p in proteins])):
                        missing_data.append(name)
            if len(missing_data) > 0:
                kw_args["msg"] = "<p>Required data is missing: %s</p>" % (", ".join(missing_data))

            # check if this feature category is already in the feature matrix
            elif featcat_id in fe.available_protein_featcat_ids():
                kw_args["msg"] = "This feature category has " + "already been calculated"

            else:
                # put job in queue
                pm.run_feature_extraction([featcat_id])

                # redirect to feature list page
                raise cherrypy.HTTPRedirect(self.get_url(0))

        kw_args["aaindex_scale_ids"] = sequtil.aaindex_scale_ids
        kw_args["pseaac_scale_ids"] = sequtil.pseaac_scale_ids

        template_f = self.get_template_f(smi)
        return spiceweb.get_template(template_f, **kw_args)
Example #6
0
    def show_feature_data(self, smi):

        self.fetch_session_data()

        if self.project_id is None:
            return self.no_project_selected()

        kw_args = self.get_template_args(smi)
        kw_args["fe"] = self.project_manager.get_feature_extraction()
        # kw_args['show_filter'] = self.project_manager.get_feat_calc_status()
        kw_args["show_filter"] = True

        template_f = self.get_template_f(smi)

        return spiceweb.get_template(template_f, **kw_args)
Example #7
0
    def list(self):

        smi = 0
        self.fetch_session_data()

        if(self.project_id is None):
            return self.no_project_selected()

        pm = self.project_manager
        cl_ids = pm.get_classifier_ids()

        kw_args = self.get_template_args(smi)
        kw_args['cl_ids'] = cl_ids

        template_f = self.get_template_f(smi)

        return spiceweb.get_template(template_f, **kw_args)
Example #8
0
    def list(self):

        smi = 0

        self.fetch_session_data()

        if self.project_id is None:
            return self.no_project_selected()

        pm = self.project_manager
        fe = pm.get_feature_extraction()

        kw_args = self.get_template_args(smi)

        kw_args["fe"] = fe
        kw_args["featcats"] = fe.PROTEIN_FEATURE_CATEGORIES
        kw_args["feat_status"] = pm.get_feat_calc_status()

        template_f = self.get_template_f(smi)

        return spiceweb.get_template(template_f, **kw_args)
Example #9
0
    def load_example(self, example_number):

        self.fetch_session_data()
        smi = 1

        try:
            example_number = int(example_number)
        except ValueError:
            example_number = -1

        if(example_number < 0 or example_number >= len(self.EXAMPLES)):
            kw_args = self.get_template_args(smi)
            template_f = 'no_such_example.html'
            return spiceweb.get_template(template_f, **kw_args)
        
        pm = self.project_manager
        (pid, seq_f, seq_type, labeling_f) = self.EXAMPLES[example_number]

        root_d = spiceweb.spiceweb_dir
        seq_f = os.path.join(root_d, self.EXAMPLE_DIR, pid, seq_f)
        labeling_f = os.path.join(root_d, self.EXAMPLE_DIR, pid, labeling_f)
        error_msg = pm.start_example_project(pid, seq_f, seq_type, labeling_f)
        
        if(error_msg == ''):
            
            # store project id in session
            cherrypy.session[self.SESSION_PROJECT_KEY] = pid

            # redirect to project list page
            url = self.get_url(0)
            raise cherrypy.HTTPRedirect(url)

        else:
            print
            print 'This should not happen...'
            print error_msg
        
        url = self.get_url(0)
        raise cherrypy.HTTPRedirect(url)
Example #10
0
    def new(self, cl_type=None, n_fold_cv=None, labeling_name=None,
            class_ids=None, feat_ids=None):

        smi = 1
        self.fetch_session_data()

        if(self.project_id is None):
            return self.no_project_selected()

        error_msg = None

        # start classification job if required arguments from form are there
        if not(cl_type is None or n_fold_cv is None):

            pm = self.project_manager

            # check the number of class ids
            if(len(class_ids) < 2):
                error_msg = 'At least two classes should be provided.'
            elif(len(feat_ids) < 1):
                error_msg = 'No features selected.'
            else:
                pm.run_classification(cl_type, n_fold_cv, labeling_name,
                                      class_ids, feat_ids)

            # redirect to classifier list page
            raise cherrypy.HTTPRedirect(self.get_url(0))

        # show form otherwise
        else:
            kw_args = self.get_template_args(smi)
            kw_args['fe'] = self.project_manager.get_feature_extraction()
            kw_args['show_filter'] = True
            kw_args['error_msg'] = error_msg
            kw_args['cl_ids'] = self.project_manager.get_classifier_ids()

            template_f = self.get_template_f(smi)

            return spiceweb.get_template(template_f, **kw_args)
Example #11
0
    def details(self, project_id, data_type=None, data_name=None,
                data_file=None):

        self.fetch_session_data()
        smi = 2
        
        # first check if the provided project_id excists
        existing_projects = [p[0] for p in self.project_manager.get_projects()]

        if not(project_id in existing_projects):

            # return message that this project does not exist
            kw_args = self.get_template_args(smi)
            template_f = 'no_such_project.html'
            return spiceweb.get_template(template_f, **kw_args)

        # store project id in session
        cherrypy.session[self.SESSION_PROJECT_KEY] = project_id
        
        # reset the session data, using the new project id
        self.fetch_session_data()

        msg_lab = ''
        msg_seq = ''

        # in case of a data file upload
        if((data_type and data_name) and data_file):

            pm = self.project_manager

            # the upload labeling case
            if(data_type == 'labeling'):

                # check labeling input data
                if(data_file.file is None):
                    msg_lab = 'No labeling file provided'
                elif(' ' in data_name):
                    msg_lab = 'Spaces are not allowed in the project id'
                elif not(re.match('^[A-Za-z0-9_-]*$', data_name)):
                    msg_lab = 'Only characters, digits, dashes, and ' +\
                                'underscores are allowed in a project id'

                # if no incorrect input data
                else:
                    # try to add the labeling, storing errors in msg_lab
                    try:
                        msg_lab = pm.add_labeling(data_name, data_file.file)
                    except Exception:
                        print(traceback.format_exc())
                        msg_lab = 'Error adding labeling'

                # chop labeling message to reasonable size
                if(len(msg_lab) > 100):
                    msg_lab = msg_lab[:100] + '...' 

            # the upload sequence data case
            elif(data_type == 'data_source'):
                
                # check sequence input data    
                if(data_file.file == None):
                    msg_seq = 'No file provided.'

                # if no incorrect input data
                else:
                    # try to add sequence data
                    try:
                        msg_seq = pm.add_data_source(data_name, data_file.file)
                    except Exception:
                        msg_seq = 'Error adding sequence data.'

            if(msg_seq[:13] == 'Error in data'):
                msg_seq = msg_seq + '<br /><br />NOTE:<ul><li>Secundary structure sequences should consist of the letters C, H, and E (same as output psipred)</li><li>Solvent accessibility sequences should consist of the letters B (buried), and E (exposed)</li></ul>'

        fe = self.project_manager.get_feature_extraction()

        kw_args = self.get_template_args(smi)
        kw_args['fe'] = fe
        kw_args['data_sources'] = ['prot_seq', 'orf_seq', 'ss_seq', 'sa_seq']
        kw_args['msg_lab'] = msg_lab
        kw_args['msg_seq'] = msg_seq

        template_f = self.get_template_f(smi)

        return spiceweb.get_template(template_f, **kw_args)
Example #12
0
 def no_project_selected(self):
     kw_args = self.get_template_args(0)
     template_f = "no_project_selected.html"
     return spiceweb.get_template(template_f, **kw_args)
Example #13
0
 def no_such_classifier(self):
     kw_args = self.get_template_args(0)
     template_f = 'no_such_classifier.html'
     return spiceweb.get_template(template_f, **kw_args)
Example #14
0
    def run(self, cl_id, data_set=None):

        smi = 3
        self.fetch_session_data()

        pm = self.project_manager
        fe = pm.get_feature_extraction()

        if(self.project_id is None):
            return self.no_project_selected()

        if not(cl_id in pm.get_classifier_ids()):
            return self.no_such_classifier()

        kw_args = self.get_template_args(smi)
        kw_args['cl_id'] = cl_id
        kw_args['cl_ids'] = pm.get_classifier_ids()

        if(self.project_manager.get_classifier_finished(cl_id)):

            if not(data_set is None):

                # required feature categories for running classifier cl_id
                settings_dict = pm.get_classifier_settings(cl_id)
                feat_ids = settings_dict['feature_names']
                feat_cats = set([f.split('_')[0] for f in feat_ids])

                # required sequence data for calculating the feature categories
                required_seq_data = set()
                for fc in feat_cats:
                     for ds in fe.PROTEIN_FEATURE_CATEGORIES[fc].required_data:
                        required_seq_data.add(ds)

                # SWITCH TO OTHER PROJECT FOR CHECKING SEQUENCE AVAILABILITY
                prev_proj = pm.project_id
                pm.set_project(data_set)
                data_set_fe = pm.get_feature_extraction()
                data_set_proteins = data_set_fe.protein_data_set.proteins
                # SWITCH BACK TO CURRENT PROJECT
                pm.set_project(prev_proj)

                # check if required data is available for data set
                missing_data = set()
                for get_data_func, all_objects in required_seq_data:
                    name = ' '.join(get_data_func.__name__.split('_')[1:])
                    print name
                    print
                    if(all_objects):
                        if not(all([get_data_func(p) for p in data_set_proteins])):
                            missing_data.add(name)
                    else:
                        if not(any([get_data_func(p) for p in data_set_proteins])):
                            missing_data.add(name)


                if(len(missing_data) > 0):
                    # send error msg to template
                    kw_args['msg'] = '<p>The features that are required for running this classifier can not be calculated for the %s project, because the following sequence data is not available in this project:</p><ul>' % (data_set)
                    for item in sorted(missing_data):
                        kw_args['msg'] += '<li>%ss</li>' % (item)
                    kw_args['msg'] += '</ul>'
                else:

                    # run classifier on data set
                    pm.run_classify(cl_id, data_set)

                    # redirect to classifier run page
                    raise cherrypy.HTTPRedirect(self.get_url(3) + '/' + cl_id)

            # fetch all classification data for this classifier
            all_data_sets = [p[0] for p in pm.get_projects()]
            classification_status = pm.parse_classify_job_files(cl_id)
            classification_busy = []
            for key in classification_status.keys():
                classification_busy.extend(classification_status[key])
            classification_unavailable = sorted(set(all_data_sets) -
                                          set(classification_busy))

            # forward this data to the template:
            # to inform the template if classifier construction has finished
            kw_args['classifier_f'] = pm.get_classifier_f(cl_id)
            # for the drop-down list of data sets without classification
            kw_args['data_sets'] = classification_unavailable
            # for the results and status tables
            kw_args['classification_status'] = classification_status

        template_f = self.get_template_f(smi)
        return spiceweb.get_template(template_f, **kw_args)