def _matrix_input_from_dict2d(matrix): """makes input for running clearcut on a matrix from a dict2D object""" # clearcut truncates names to 10 char- need to rename before and # reassign after # make a dict of env_index:full name int_keys = dict([("env_" + str(i), k) for i, k in enumerate(sorted(matrix.keys()))]) # invert the dict int_map = {} for i in int_keys: int_map[int_keys[i]] = i # make a new dict2D object with the integer keys mapped to values instead of # the original names new_dists = [] for env1 in matrix: for env2 in matrix[env1]: new_dists.append((int_map[env1], int_map[env2], matrix[env1][env2])) int_map_dists = Dict2D(new_dists) # names will be fed into the phylipTable function - it is the int map names names = sorted(int_map_dists.keys()) rows = [] # populated rows with values based on the order of names # the following code will work for a square matrix only for index, key1 in enumerate(names): row = [] for key2 in names: row.append(str(int_map_dists[key1][key2])) rows.append(row) input_matrix = phylipMatrix(rows, names) # input needs a trailing whitespace or it will fail! input_matrix += "\n" return input_matrix, int_keys
def tostring(self, borders=True, sep=None, format='', **kwargs): """Return the table as a formatted string. Arguments: - format: possible formats are 'rest', 'latex', 'html', 'phylip', 'bedgraph', or simple text (default). - sep: A string separator for delineating columns, e.g. ',' or '\t'. Overrides format. NOTE: If format is bedgraph, assumes that column headers are chrom, start, end, value. In that order! """ if format.lower() == 'phylip': missing_data = "%.4f" % 0.0 else: missing_data = self._missing_data # convert self to a 2D list formatted_table = self.array.tolist() if format != 'bedgraph': header, formatted_table = table_format.formattedCells( formatted_table, self.Header, digits=self._digits, column_templates=self._column_templates, missing_data=missing_data) args = (header, formatted_table, self.Title, self.Legend) if sep and format != 'bedgraph': return table_format.separatorFormat(*args + (sep, )) elif format == 'rest': return table_format.gridTableFormat(*args) elif format.endswith('tex'): caption = None if self.Title or self.Legend: caption = " ".join([self.Title or "", self.Legend or ""]) return table_format.latex(formatted_table, header, caption=caption, **kwargs) elif format == 'html': rest = table_format.gridTableFormat(*args) return table_format.html(rest) elif format == 'phylip': # need to eliminate row identifiers formatted_table = [row[self._row_ids:] for row in formatted_table] header = header[self._row_ids:] return table_format.phylipMatrix(formatted_table, header) elif format == 'bedgraph': assert self.Shape[1] == 4, 'bedgraph format is for 4 column tables' # assuming that header order is chrom, start, end, val formatted_table = bedgraph.bedgraph(self.sorted().array.tolist(), **kwargs) return formatted_table else: return table_format.simpleFormat( *args + (self._max_width, self._row_ids, borders, self.Space))
def dotur_from_alignment(aln,moltype,distance_function,params=None): """Returns dotur results given an alignment and distance function. - aln: An Alignment object or something that behaves like one. Sequences must be aligned. - moltype: cogent.core.moltype object. - distance_function: function that can be passed to distanceMatrix() method of SequenceCollection. Must be able to find distance between two sequences. - NOTE: This function will only return the parsed *.list file, as it contains the OTU identities. Dotur generates 23 output files, so if this is not the one you are looking for, check out the documentation and add the others to the result path. """ #construct Alignment object. This will handle unaligned sequences. aln = Alignment(aln, MolType=moltype) #need to make int map. int_map, int_keys = aln.getIntMap() #construct Alignment object from int map to use object functionality int_map = Alignment(int_map, MolType=moltype) order = sorted(int_map.Names) #Build distance matrix. d_matrix_dict = int_map.distanceMatrix(f=distance_function) d_matrix_dict.RowOrder=order d_matrix_dict.ColOrder=order #Get distance matrix in list form. d_matrix_list = d_matrix_dict.toLists() #must be strings to use phylipMatrix for i,line in enumerate(d_matrix_list): d_matrix_list[i]=map(str,line) #Get phylip formatted string. phylip_matrix_string = phylipMatrix(rows=d_matrix_list,names=order) working_dir = get_tmp_filename(suffix='') app = Dotur(InputHandler='_input_as_multiline_string',\ WorkingDir=working_dir,params=params) res = app(phylip_matrix_string) otu_list = OtuListParser(res['List'].readlines()) #remap sequence names for i,otu in enumerate(otu_list): otu_list[i][2]=remap_seq_names(otu[2], int_keys) shutil.rmtree(app.WorkingDir) return otu_list
def dotur_from_alignment(aln, moltype, distance_function, params=None): """Returns dotur results given an alignment and distance function. - aln: An Alignment object or something that behaves like one. Sequences must be aligned. - moltype: cogent.core.moltype object. - distance_function: function that can be passed to distanceMatrix() method of SequenceCollection. Must be able to find distance between two sequences. - NOTE: This function will only return the parsed *.list file, as it contains the OTU identities. Dotur generates 23 output files, so if this is not the one you are looking for, check out the documentation and add the others to the result path. """ #construct Alignment object. This will handle unaligned sequences. aln = Alignment(aln, MolType=moltype) #need to make int map. int_map, int_keys = aln.getIntMap() #construct Alignment object from int map to use object functionality int_map = Alignment(int_map, MolType=moltype) order = sorted(int_map.Names) #Build distance matrix. d_matrix_dict = int_map.distanceMatrix(f=distance_function) d_matrix_dict.RowOrder = order d_matrix_dict.ColOrder = order #Get distance matrix in list form. d_matrix_list = d_matrix_dict.toLists() #must be strings to use phylipMatrix for i, line in enumerate(d_matrix_list): d_matrix_list[i] = map(str, line) #Get phylip formatted string. phylip_matrix_string = phylipMatrix(rows=d_matrix_list, names=order) working_dir = get_tmp_filename(suffix='') app = Dotur(InputHandler='_input_as_multiline_string',\ WorkingDir=working_dir,params=params) res = app(phylip_matrix_string) otu_list = OtuListParser(res['List'].readlines()) #remap sequence names for i, otu in enumerate(otu_list): otu_list[i][2] = remap_seq_names(otu[2], int_keys) shutil.rmtree(app.WorkingDir) return otu_list
def tostring(self, borders=True, sep=None, format='', **kwargs): """Return the table as a formatted string. Arguments: - format: possible formats are 'rest', 'latex', 'html', 'phylip', 'bedgraph', or simple text (default). - sep: A string separator for delineating columns, e.g. ',' or '\t'. Overrides format. NOTE: If format is bedgraph, assumes that column headers are chrom, start, end, value. In that order! """ if format.lower() == 'phylip': missing_data = "%.4f" % 0.0 else: missing_data = self._missing_data # convert self to a 2D list formatted_table = self.array.tolist() if format != 'bedgraph': header, formatted_table = table_format.formattedCells(formatted_table, self.Header, digits = self._digits, column_templates = self._column_templates, missing_data = missing_data) args = (header, formatted_table, self.Title, self.Legend) if sep and format != 'bedgraph': return table_format.separatorFormat(*args + (sep,)) elif format == 'rest': return table_format.gridTableFormat(*args) elif format.endswith('tex'): caption = None if self.Title or self.Legend: caption = " ".join([self.Title or "", self.Legend or ""]) return table_format.latex(formatted_table, header, caption = caption, **kwargs) elif format == 'html': rest = table_format.gridTableFormat(*args) return table_format.html(rest) elif format == 'phylip': # need to eliminate row identifiers formatted_table = [row[self._row_ids:] for row in formatted_table] header = header[self._row_ids:] return table_format.phylipMatrix(formatted_table, header) elif format == 'bedgraph': assert self.Shape[1] == 4, 'bedgraph format is for 4 column tables' # assuming that header order is chrom, start, end, val formatted_table = bedgraph.bedgraph(self.sorted().array.tolist(), **kwargs) return formatted_table else: return table_format.simpleFormat(*args + (self._max_width, self._row_ids, borders, self.Space))
def _matrix_input_from_dict2d(matrix): """makes input for running clearcut on a matrix from a dict2D object""" #clearcut truncates names to 10 char- need to rename before and #reassign after #make a dict of env_index:full name int_keys = dict([('env_' + str(i), k) for i,k in \ enumerate(sorted(matrix.keys()))]) #invert the dict int_map = {} for i in int_keys: int_map[int_keys[i]] = i #make a new dict2D object with the integer keys mapped to values instead of #the original names new_dists = [] for env1 in matrix: for env2 in matrix[env1]: new_dists.append( (int_map[env1], int_map[env2], matrix[env1][env2])) int_map_dists = Dict2D(new_dists) #names will be fed into the phylipTable function - it is the int map names names = sorted(int_map_dists.keys()) rows = [] #populated rows with values based on the order of names #the following code will work for a square matrix only for index, key1 in enumerate(names): row = [] for key2 in names: row.append(str(int_map_dists[key1][key2])) rows.append(row) input_matrix = phylipMatrix(rows, names) #input needs a trailing whitespace or it will fail! input_matrix += '\n' return input_matrix, int_keys