Esempio n. 1
0
def _matrix_input_from_dict2d(matrix):
    """makes input for running clearcut on a matrix from a dict2D object"""
    # clearcut truncates names to 10 char- need to rename before and
    # reassign after

    # make a dict of env_index:full name
    int_keys = dict([("env_" + str(i), k) for i, k in enumerate(sorted(matrix.keys()))])
    # invert the dict
    int_map = {}
    for i in int_keys:
        int_map[int_keys[i]] = i

    # make a new dict2D object with the integer keys mapped to values instead of
    # the original names
    new_dists = []
    for env1 in matrix:
        for env2 in matrix[env1]:
            new_dists.append((int_map[env1], int_map[env2], matrix[env1][env2]))
    int_map_dists = Dict2D(new_dists)

    # names will be fed into the phylipTable function - it is the int map names
    names = sorted(int_map_dists.keys())
    rows = []
    # populated rows with values based on the order of names
    # the following code will work for a square matrix only
    for index, key1 in enumerate(names):
        row = []
        for key2 in names:
            row.append(str(int_map_dists[key1][key2]))
        rows.append(row)
    input_matrix = phylipMatrix(rows, names)
    # input needs a trailing whitespace or it will fail!
    input_matrix += "\n"

    return input_matrix, int_keys
Esempio n. 2
0
    def tostring(self, borders=True, sep=None, format='', **kwargs):
        """Return the table as a formatted string.
        
        Arguments:
            - format: possible formats are 'rest', 'latex', 'html', 'phylip',
              'bedgraph', or simple text (default).
            - sep: A string separator for delineating columns, e.g. ',' or '\t'.
              Overrides format.
        
        NOTE: If format is bedgraph, assumes that column headers are chrom,
        start, end, value. In that order!
        """
        if format.lower() == 'phylip':
            missing_data = "%.4f" % 0.0
        else:
            missing_data = self._missing_data

        # convert self to a 2D list
        formatted_table = self.array.tolist()
        if format != 'bedgraph':
            header, formatted_table = table_format.formattedCells(
                formatted_table,
                self.Header,
                digits=self._digits,
                column_templates=self._column_templates,
                missing_data=missing_data)
            args = (header, formatted_table, self.Title, self.Legend)
        if sep and format != 'bedgraph':
            return table_format.separatorFormat(*args + (sep, ))
        elif format == 'rest':
            return table_format.gridTableFormat(*args)
        elif format.endswith('tex'):
            caption = None
            if self.Title or self.Legend:
                caption = " ".join([self.Title or "", self.Legend or ""])
            return table_format.latex(formatted_table,
                                      header,
                                      caption=caption,
                                      **kwargs)
        elif format == 'html':
            rest = table_format.gridTableFormat(*args)
            return table_format.html(rest)
        elif format == 'phylip':
            # need to eliminate row identifiers
            formatted_table = [row[self._row_ids:] for row in formatted_table]
            header = header[self._row_ids:]
            return table_format.phylipMatrix(formatted_table, header)
        elif format == 'bedgraph':
            assert self.Shape[1] == 4, 'bedgraph format is for 4 column tables'
            # assuming that header order is chrom, start, end, val
            formatted_table = bedgraph.bedgraph(self.sorted().array.tolist(),
                                                **kwargs)
            return formatted_table
        else:
            return table_format.simpleFormat(
                *args + (self._max_width, self._row_ids, borders, self.Space))
Esempio n. 3
0
def dotur_from_alignment(aln,moltype,distance_function,params=None):
    """Returns dotur results given an alignment and distance function.
    
        - aln: An Alignment object or something that behaves like one.
            Sequences must be aligned.
        - moltype: cogent.core.moltype object.
        - distance_function: function that can be passed to distanceMatrix()
            method of SequenceCollection.  Must be able to find distance
            between two sequences.
        
        - NOTE:  This function will only return the parsed *.list file, as
            it contains the OTU identities.
            Dotur generates 23 output files, so if this is not the one you
            are looking for, check out the documentation and add the others
            to the result path.
    """
    #construct Alignment object.  This will handle unaligned sequences.
    aln = Alignment(aln, MolType=moltype)
    
    #need to make int map.
    int_map, int_keys = aln.getIntMap()
    #construct Alignment object from int map to use object functionality
    int_map = Alignment(int_map, MolType=moltype)
    order = sorted(int_map.Names)
    
    #Build distance matrix.
    d_matrix_dict = int_map.distanceMatrix(f=distance_function)
    d_matrix_dict.RowOrder=order
    d_matrix_dict.ColOrder=order
    
    #Get distance matrix in list form.
    d_matrix_list = d_matrix_dict.toLists()
    
    #must be strings to use phylipMatrix
    for i,line in enumerate(d_matrix_list):
        d_matrix_list[i]=map(str,line)
    
    #Get phylip formatted string.
    phylip_matrix_string = phylipMatrix(rows=d_matrix_list,names=order)
        
    working_dir = get_tmp_filename(suffix='')
    app = Dotur(InputHandler='_input_as_multiline_string',\
        WorkingDir=working_dir,params=params)
    
    res = app(phylip_matrix_string)
    
    otu_list = OtuListParser(res['List'].readlines())
    
    #remap sequence names
    for i,otu in enumerate(otu_list):
        otu_list[i][2]=remap_seq_names(otu[2], int_keys)
    
    shutil.rmtree(app.WorkingDir)
    
    return otu_list
Esempio n. 4
0
def dotur_from_alignment(aln, moltype, distance_function, params=None):
    """Returns dotur results given an alignment and distance function.
    
        - aln: An Alignment object or something that behaves like one.
            Sequences must be aligned.
        - moltype: cogent.core.moltype object.
        - distance_function: function that can be passed to distanceMatrix()
            method of SequenceCollection.  Must be able to find distance
            between two sequences.
        
        - NOTE:  This function will only return the parsed *.list file, as
            it contains the OTU identities.
            Dotur generates 23 output files, so if this is not the one you
            are looking for, check out the documentation and add the others
            to the result path.
    """
    #construct Alignment object.  This will handle unaligned sequences.
    aln = Alignment(aln, MolType=moltype)

    #need to make int map.
    int_map, int_keys = aln.getIntMap()
    #construct Alignment object from int map to use object functionality
    int_map = Alignment(int_map, MolType=moltype)
    order = sorted(int_map.Names)

    #Build distance matrix.
    d_matrix_dict = int_map.distanceMatrix(f=distance_function)
    d_matrix_dict.RowOrder = order
    d_matrix_dict.ColOrder = order

    #Get distance matrix in list form.
    d_matrix_list = d_matrix_dict.toLists()

    #must be strings to use phylipMatrix
    for i, line in enumerate(d_matrix_list):
        d_matrix_list[i] = map(str, line)

    #Get phylip formatted string.
    phylip_matrix_string = phylipMatrix(rows=d_matrix_list, names=order)

    working_dir = get_tmp_filename(suffix='')
    app = Dotur(InputHandler='_input_as_multiline_string',\
        WorkingDir=working_dir,params=params)

    res = app(phylip_matrix_string)

    otu_list = OtuListParser(res['List'].readlines())

    #remap sequence names
    for i, otu in enumerate(otu_list):
        otu_list[i][2] = remap_seq_names(otu[2], int_keys)

    shutil.rmtree(app.WorkingDir)

    return otu_list
Esempio n. 5
0
 def tostring(self, borders=True, sep=None, format='', **kwargs):
     """Return the table as a formatted string.
     
     Arguments:
         - format: possible formats are 'rest', 'latex', 'html', 'phylip',
           'bedgraph', or simple text (default).
         - sep: A string separator for delineating columns, e.g. ',' or '\t'.
           Overrides format.
     
     NOTE: If format is bedgraph, assumes that column headers are chrom,
     start, end, value. In that order!
     """
     if format.lower() == 'phylip':
         missing_data = "%.4f" % 0.0
     else:
         missing_data = self._missing_data
     
     # convert self to a 2D list
     formatted_table = self.array.tolist()
     if format != 'bedgraph':
         header, formatted_table = table_format.formattedCells(formatted_table,
                                 self.Header,
                                 digits = self._digits,
                                 column_templates = self._column_templates,
                                 missing_data = missing_data)
         args = (header, formatted_table, self.Title, self.Legend)
     if sep and format != 'bedgraph':
         return table_format.separatorFormat(*args + (sep,))
     elif format == 'rest':
         return table_format.gridTableFormat(*args)
     elif format.endswith('tex'):
         caption = None
         if self.Title or self.Legend:
             caption = " ".join([self.Title or "", self.Legend or ""])
         return table_format.latex(formatted_table, header,
                             caption = caption, **kwargs)
     elif format == 'html':
         rest = table_format.gridTableFormat(*args)
         return table_format.html(rest)
     elif format == 'phylip':
         # need to eliminate row identifiers
         formatted_table = [row[self._row_ids:] for row in formatted_table]
         header = header[self._row_ids:]
         return table_format.phylipMatrix(formatted_table, header)
     elif format == 'bedgraph':
         assert self.Shape[1] == 4, 'bedgraph format is for 4 column tables'
         # assuming that header order is chrom, start, end, val
         formatted_table = bedgraph.bedgraph(self.sorted().array.tolist(),
             **kwargs)
         return formatted_table
     else:
         return table_format.simpleFormat(*args + (self._max_width,
                             self._row_ids, borders, self.Space))
Esempio n. 6
0
def _matrix_input_from_dict2d(matrix):
    """makes input for running clearcut on a matrix from a dict2D object"""
    #clearcut truncates names to 10 char- need to rename before and
    #reassign after

    #make a dict of env_index:full name
    int_keys = dict([('env_' + str(i), k) for i,k in \
            enumerate(sorted(matrix.keys()))])
    #invert the dict
    int_map = {}
    for i in int_keys:
        int_map[int_keys[i]] = i

    #make a new dict2D object with the integer keys mapped to values instead of
    #the original names
    new_dists = []
    for env1 in matrix:
        for env2 in matrix[env1]:
            new_dists.append(
                (int_map[env1], int_map[env2], matrix[env1][env2]))
    int_map_dists = Dict2D(new_dists)

    #names will be fed into the phylipTable function - it is the int map names
    names = sorted(int_map_dists.keys())
    rows = []
    #populated rows with values based on the order of names
    #the following code will work for a square matrix only
    for index, key1 in enumerate(names):
        row = []
        for key2 in names:
            row.append(str(int_map_dists[key1][key2]))
        rows.append(row)
    input_matrix = phylipMatrix(rows, names)
    #input needs a trailing whitespace or it will fail!
    input_matrix += '\n'

    return input_matrix, int_keys