def convertBiomFileToStampProfile(self, file_name, output_name, metadata_name): """ Function taken from PICRUSt by Morgan Langill. https://github.com/mlangill/get_mgrast_data/blob/master/biom_to_stamp.py """ #allow file to be optionally gzipped (must use extension '.gz') ext=splitext(file_name)[1] if (ext == '.gz'): table = parse_biom_table(gzip.open(file_name,'rb')) else: table = parse_biom_table(open(file_name,'U')) metadata_name = metadata_name.split('(')[0].rstrip() if metadata_name is None or metadata_name == '<observation ids>': max_len_metadata = 0 elif table.observation_metadata and metadata_name in table.observation_metadata[0]: #figure out the longest list within the given metadata max_len_metadata = max(len(p[metadata_name]) for p in table.observation_metadata) else: QtGui.QMessageBox.information(self, 'Unrecognized metadata file', "'" + metadata_name + "' was not found in the BIOM table.", QtGui.QMessageBox.Ok) return #make the header line header=[] #make simple labels for each level in the metadata (e.g. 'Level_1', 'Level_2', etc.) "+1" for the observation id as well. for i in range(max_len_metadata): header.append('Level_'+ str(i+1)) header.append('Observation Ids') #add the sample ids to the header line header.extend(table.sample_ids) fout = open(output_name, 'w') fout.write("\t".join(header) + '\n') #now process each observation (row in the table) for obs_vals, obs_id, obs_metadata in table.iter(axis='observation'): row=[] if max_len_metadata > 0: row = obs_metadata[metadata_name] # add blanks if the metadata doesn't fill each level if len(row) < max_len_metadata: for i in range(max_len_metadata - len(row)): row.append('unclassified') #Add the observation id as the last "Level" if isNumber(obs_id): row.append('ID' + obs_id) else: row.append(obs_id) #Add count data to the row row.extend(map(str,obs_vals)) fout.write("\t".join(row) + '\n') fout.close()
def determineColumns(self, data, profileTree): firstDataRow = data[1].split('\t') # first column entry that is numeric is assumed to be from first sample firstSampleIndex = 0 for entry in firstDataRow: if isNumber(entry): break firstSampleIndex += 1 # get hierarchical and sample names headings = data[0].split('\t') headings = map(string.strip, headings) profileTree.hierarchyHeadings = headings[0:firstSampleIndex] profileTree.sampleNames = headings[firstSampleIndex:]
def determineColumns(self, data, profileTree): firstDataRow = data[1].split('\t') # first column entry that is numeric is assumed to be from first sample firstSampleIndex = 0 for entry in firstDataRow: if isNumber(entry): break firstSampleIndex += 1 # get hierarchical and sample names headings = data[0].split('\t') headings = map(string.strip, headings) profileTree.hierarchyHeadings = headings[0:firstSampleIndex] profileTree.sampleNames = headings[firstSampleIndex:]
def sort(self, Ncol, order): ''' Sort table by given column number. ''' if len(self.arraydata) == 0: return self.emit(QtCore.SIGNAL("layoutAboutToBeChanged()")) dataIsNumeric = isNumber(self.arraydata[0][Ncol]) if dataIsNumeric: self.arraydata = SortTableNumericStrCol(self.arraydata, Ncol) else: self.arraydata = SortTableStrCol(self.arraydata, Ncol) if order == QtCore.Qt.DescendingOrder: self.arraydata.reverse() self.emit(QtCore.SIGNAL("layoutChanged()"))
def sort(self, Ncol, order): ''' Sort table by given column number. ''' if len(self.arraydata) == 0: return self.emit(QtCore.SIGNAL("layoutAboutToBeChanged()")) dataIsNumeric = isNumber(self.arraydata[0][Ncol]) if dataIsNumeric: self.arraydata = SortTableNumericStrCol(self.arraydata, Ncol) else: self.arraydata = SortTableStrCol(self.arraydata, Ncol) if order == QtCore.Qt.DescendingOrder: self.arraydata.reverse() self.emit(QtCore.SIGNAL("layoutChanged()"))
def convertBiomFileToStampProfile(self, file_name, output_name, metadata_name): """ Function taken from PICRUSt by Morgan Langill. https://github.com/mlangill/get_mgrast_data/blob/master/biom_to_stamp.py """ #allow file to be optionally gzipped (must use extension '.gz') ext = splitext(file_name)[1] if (ext == '.gz'): table = parse_biom_table(gzip.open(file_name, 'rb')) else: table = parse_biom_table(open(file_name, 'U')) metadata_name = metadata_name.split('(')[0].rstrip() if metadata_name is None or metadata_name == '<observation ids>': max_len_metadata = 0 elif table.observation_metadata and metadata_name in table.observation_metadata[ 0]: #figure out the longest list within the given metadata max_len_metadata = max( len(p[metadata_name]) for p in table.observation_metadata) else: QtGui.QMessageBox.information( self, 'Unrecognized metadata file', "'" + metadata_name + "' was not found in the BIOM table.", QtGui.QMessageBox.Ok) return #make the header line header = [] #make simple labels for each level in the metadata (e.g. 'Level_1', 'Level_2', etc.) "+1" for the observation id as well. for i in range(max_len_metadata): header.append('Level_' + str(i + 1)) header.append('Observation Ids') #add the sample ids to the header line header.extend(table.sample_ids) fout = open(output_name, 'w') fout.write("\t".join(header) + '\n') #now process each observation (row in the table) for obs_vals, obs_id, obs_metadata in table.iter(axis='observation'): row = [] if max_len_metadata > 0: row = obs_metadata[metadata_name] # add blanks if the metadata doesn't fill each level if len(row) < max_len_metadata: for i in range(max_len_metadata - len(row)): row.append('unclassified') #Add the observation id as the last "Level" if isNumber(obs_id): row.append('ID' + obs_id) else: row.append(obs_id) #Add count data to the row row.extend(map(str, obs_vals)) fout.write("\t".join(row) + '\n') fout.close()