def load_from_dir(cls, d): ''' This class method returns a FeatureMatrix object that has been constructed using data loaded from a feature matrix directory. Args: | **d** *(str)*: The path to the feature matrix directory. Raises: ''' # initilaze empty feature matrix object fm = cls() # first load object ids, if available f = os.path.join(d, cls.OBJECT_IDS_F) if (os.path.exists(f)): fm.load_object_ids(f) # read and add labelings lab_d = os.path.join(d, cls.LABELING_D) if (os.path.exists(lab_d)): for f in glob.glob(os.path.join(lab_d, '*.txt')): lname = os.path.splitext(os.path.basename(f))[0] if not (lname == cls.ONE_CLASS_LABELING): (label_dict, class_names) = file_io.read_labeling(f) fm.add_labeling(lname, label_dict, class_names) fids = None fnames = None featmat = None # read feature ids f = os.path.join(d, cls.FEATURE_IDS_F) if (os.path.exists(f)): with open(f, 'r') as fin: fids = [i for i in file_io.read_ids(fin)] # read feature names f = os.path.join(d, cls.FEATURE_NAMES_F) if (os.path.exists(f)): with open(f, 'r') as fin: fnames = [n for n in file_io.read_names(fin)] # read feature matrix f = os.path.join(d, cls.FEATURE_MATRIX_F) if (os.path.exists(f)): featmat = numpy.loadtxt(f) # in case of 1D matrix, reshape to single column 2D matrix fm_shape = featmat.shape if (len(fm_shape) == 1): n = fm_shape[0] featmat = featmat.reshape((n, 1)) if not (featmat is None): fm.add_features(fids, featmat, fnames) return fm
def load_from_dir(cls, d): ''' This class method returns a FeatureMatrix object that has been constructed using data loaded from a feature matrix directory. Args: | **d** *(str)*: The path to the feature matrix directory. Raises: ''' # initilaze empty feature matrix object fm = cls() # first load object ids, if available f = os.path.join(d, cls.OBJECT_IDS_F) if(os.path.exists(f)): fm.load_object_ids(f) # read and add labelings lab_d = os.path.join(d, cls.LABELING_D) if(os.path.exists(lab_d)): for f in glob.glob(os.path.join(lab_d, '*.txt')): lname = os.path.splitext(os.path.basename(f))[0] if not(lname == cls.ONE_CLASS_LABELING): (label_dict, class_names) = file_io.read_labeling(f) fm.add_labeling(lname, label_dict, class_names) fids = None fnames = None featmat = None # read feature ids f = os.path.join(d, cls.FEATURE_IDS_F) if(os.path.exists(f)): with open(f, 'r') as fin: fids = [i for i in file_io.read_ids(fin)] # read feature names f = os.path.join(d, cls.FEATURE_NAMES_F) if(os.path.exists(f)): with open(f, 'r') as fin: fnames = [n for n in file_io.read_names(fin)] # read feature matrix f = os.path.join(d, cls.FEATURE_MATRIX_F) if(os.path.exists(f)): featmat = numpy.loadtxt(f) # in case of 1D matrix, reshape to single column 2D matrix fm_shape = featmat.shape if(len(fm_shape) == 1): n = fm_shape[0] featmat = featmat.reshape((n, 1)) if not(featmat is None): fm.add_features(fids, featmat, fnames) return fm
def load_object_ids(self, object_ids_f): ''' This function reads ids from files and sets them as the object ids. Args: object_ids_f (str or file): The ids file. Raises: FileIOError: If the file does not exist. ''' with open(object_ids_f, 'r') as fin: ids = [i for i in file_io.read_ids(fin)] self.object_ids = ids
def add_custom_features(self, project_id, object_ids_f, feature_matrix_f): ''' ''' self.set_project(project_id) try: object_ids = [i for i in file_io.read_ids(object_ids_f.file)] except Exception as e: print '\n%s\n%s\n%s\n' % (e, type(e), e.args) return 'Error in object ids file' object_ids_f.file.close() try: featmat = numpy.loadtxt(feature_matrix_f.file) except Exception as e: print '\n%s\n%s\n%s\n' % (e, type(e), e.args) return 'Error in feature matrix file' feature_matrix_f.file.close() fm = self.get_feature_matrix() if not(sorted(object_ids) == sorted(fm.object_ids)): return 'The protein ids do not correspond to the proteins ' +\ 'in this project' if not(featmat.shape[0] == len(fm.object_ids)): return 'The number of rows in the feature matrix does not ' +\ 'correspond to the number of proteins in this project.' # reorder feature matrix rows featmat = featmat[fm.object_indices(object_ids)] try: fm.add_custom_features(featmat) except ValueError as e: return str(e) except Exception as e: print e return 'Something went wrong while adding custom features' fm.save_to_dir(self.fm_dir) return ''
def add_custom_features(self, project_id, object_ids_f, feature_matrix_f): ''' ''' self.set_project(project_id) try: object_ids = [i for i in file_io.read_ids(object_ids_f.file)] except Exception as e: print '\n%s\n%s\n%s\n' % (e, type(e), e.args) return 'Error in object ids file' object_ids_f.file.close() try: featmat = numpy.loadtxt(feature_matrix_f.file) except Exception as e: print '\n%s\n%s\n%s\n' % (e, type(e), e.args) return 'Error in feature matrix file' feature_matrix_f.file.close() fm = self.get_feature_matrix() if not (sorted(object_ids) == sorted(fm.object_ids)): return 'The protein ids do not correspond to the proteins ' +\ 'in this project' if not (featmat.shape[0] == len(fm.object_ids)): return 'The number of rows in the feature matrix does not ' +\ 'correspond to the number of proteins in this project.' # reorder feature matrix rows featmat = featmat[fm.object_indices(object_ids)] try: fm.add_custom_features(featmat) except ValueError as e: return str(e) except Exception as e: print e return 'Something went wrong while adding custom features' fm.save_to_dir(self.fm_dir) return ''
def taxon_list(self, taxon_domain=None): self.fetch_session_data() pm = self.project_manager taxon_id = int(taxon_domain) #top_lists = self.FAVO_TAXONS #top_list = top_lists[taxon_id] # obtain all taxons of this domain from uniprot #url = 'http://www.uniprot.org/taxonomy/' +\ # '?query=complete:yes+ancestor:%i&format=tab' % (taxon_id) #response = urllib2.urlopen(url) #full_taxon_list = response.read() f = os.path.join(pm.ref_data_dir, '%i.txt' % (taxon_id)) f_favo = os.path.join(pm.ref_data_dir, '%i_favo.txt' % (taxon_id)) taxon_tuples = [] if(os.path.exists(f)): with open(f, 'r') as fin: for line in fin: tokens = line.split() taxon_tuples.append((int(tokens[0]), ' '.join(tokens[1:]))) ids_favo = [] if(os.path.exists(f_favo)): ids_favo = [int(i) for i in file_io.read_ids(f_favo)] # parse result, fetch ids and names ''' ids = [] names = [] first_line = True for line in full_taxon_list.split('\n'): if(len(line.strip()) > 0): if(first_line): first_line = False else: tokens = line.split('\t') ids.append(int(tokens[0])) names.append(tokens[2]) ''' # turn it into select list, would be nicer to let javascript do this select_str = '' if(len(ids_favo) > 0): taxon_dict = dict(taxon_tuples) select_str += '<optgroup label="Short list">\n' for i in ids_favo: select_str += '<option value="%i">%s (taxon id: %i)</option>\n' % (i, taxon_dict[i], i) select_str += '</optgroup>\n' select_str += '<optgroup label="All uniprot complete proteome taxonomies">\n' for i, name in taxon_tuples: select_str += '<option value="%i">%s (taxon id: %i)</option>\n' % (i, name, i) select_str += '</optgroup>\n' cherrypy.response.headers['Content-Type'] = 'application/json' return simplejson.dumps(dict(taxon_list=select_str))
def load_proteins(self, protein_ids_f): with open(protein_ids_f, 'r') as fin: protein_ids = [i for i in file_io.read_ids(fin)] self.set_proteins(protein_ids)