Beispiel #1
0
def dotur_from_file(distance_matrix_file_path, params=None):
    """Returns dotur results given a distance matrix file.
    
        - distance_matrix_file_path:  Path to distance matrix file.  This file
             must a PHYLIP formatted square distance matrix.  This format
             is available in cogent.format.table.
             - IMPORANT NOTE:  This distance matrix format allows only 10
                characters for the row labels in the distance matrix.  Also,
                the IDs must be unique and ungapped to be useful when using
                dotur.
        - NOTE:  This function will only return the parsed *.list file, as
            it contains the OTU identities.
            Dotur generates 23 output files, so if this is not the one you
            are looking for, check out the documentation and add the others
            to the result path.
    """
    # Read out the data from the distance_matrix_file_path.
    # This is important so we can run dotur in a temp directory and avoid
    # having to handle all 23 output files.
    d_matrix_string = open(distance_matrix_file_path, 'U').read()

    working_dir = get_tmp_filename(suffix='')
    app = Dotur(InputHandler='_input_as_multiline_string',\
        WorkingDir=working_dir,params=params)

    res = app(d_matrix_string)

    otu_list = OtuListParser(res['List'].readlines())

    shutil.rmtree(app.WorkingDir)

    return otu_list
Beispiel #2
0
def dotur_from_alignment(aln, moltype, distance_function, params=None):
    """Returns dotur results given an alignment and distance function.
    
        - aln: An Alignment object or something that behaves like one.
            Sequences must be aligned.
        - moltype: cogent.core.moltype object.
        - distance_function: function that can be passed to distanceMatrix()
            method of SequenceCollection.  Must be able to find distance
            between two sequences.
        
        - NOTE:  This function will only return the parsed *.list file, as
            it contains the OTU identities.
            Dotur generates 23 output files, so if this is not the one you
            are looking for, check out the documentation and add the others
            to the result path.
    """
    #construct Alignment object.  This will handle unaligned sequences.
    aln = Alignment(aln, MolType=moltype)

    #need to make int map.
    int_map, int_keys = aln.getIntMap()
    #construct Alignment object from int map to use object functionality
    int_map = Alignment(int_map, MolType=moltype)
    order = sorted(int_map.Names)

    #Build distance matrix.
    d_matrix_dict = int_map.distanceMatrix(f=distance_function)
    d_matrix_dict.RowOrder = order
    d_matrix_dict.ColOrder = order

    #Get distance matrix in list form.
    d_matrix_list = d_matrix_dict.toLists()

    #must be strings to use phylipMatrix
    for i, line in enumerate(d_matrix_list):
        d_matrix_list[i] = map(str, line)

    #Get phylip formatted string.
    phylip_matrix_string = phylipMatrix(rows=d_matrix_list, names=order)

    working_dir = get_tmp_filename(suffix='')
    app = Dotur(InputHandler='_input_as_multiline_string',\
        WorkingDir=working_dir,params=params)

    res = app(phylip_matrix_string)

    otu_list = OtuListParser(res['List'].readlines())

    #remap sequence names
    for i, otu in enumerate(otu_list):
        otu_list[i][2] = remap_seq_names(otu[2], int_keys)

    shutil.rmtree(app.WorkingDir)

    return otu_list
Beispiel #3
0
 def test_otulistparser_parser(self):
     """OtuListParser should return correct result given basic output.
     """
     res = OtuListParser(self.otu_list_string.split('\n'))
     self.assertEqual(res, self.otu_res_list)
Beispiel #4
0
 def test_otulistparser_no_data(self):
     """OtuListParser should return correct result given no data.
     """
     res = OtuListParser([])
     self.assertEqual(list(res), [])