def combine_asr_tables(output_files,verbose=False):
    """ Combine all tables coming from asr output. Cuts 2nd column out and joins them together into single table.
    Assumes all output files have same row identifiers and that these are in the same order.
    """

    #Going to store an array of arrays here
    combined_table=[]

    #load in the first column (containing row ids). File doesn't matter since they should all have identical first columns.
    table=LoadTable(filename=output_files[0],header=True,sep='\t')
    row_ids = table.getRawData(columns=[table.Header[0]])
    combined_table.append([table.Header[0]])
    for row_id in row_ids:
        combined_table.append([row_id])

    #Now add the rest of the files to the table
    for i,output_file in enumerate(output_files):
        if verbose:
            print "Combining file {0} of {1}: {2}".format(i,len(output_files),output_file)
        #pull out the second column (first column with actual preditions)
        table=LoadTable(filename=output_file,header=True,sep='\t')
        predictions = table.getRawData(columns=[table.Header[1]])

        #Add the header for our column to the list of headers
        combined_table[0].append(table.Header[1])

        #Add rest of values in the column
        j=1
        for prediction in predictions:
            combined_table[j].append(prediction)
            j+=1

    return combined_table
def combine_asr_tables(output_files,verbose=False):
    """ Combine all tables coming from asr output. Cuts 2nd column out and joins them together into single table.
    Assumes all output files have same row identifiers and that these are in the same order.
    """

    #Going to store an array of arrays here
    combined_table=[]

    #load in the first column (containing row ids). File doesn't matter since they should all have identical first columns.
    table=LoadTable(filename=output_files[0],header=True,sep='\t')
    row_ids = table.getRawData(columns=[table.Header[0]])
    combined_table.append([table.Header[0]])
    for row_id in row_ids:
        combined_table.append([row_id])

    #Now add the rest of the files to the table
    for i,output_file in enumerate(output_files):
        if verbose:
            print "Combining file {0} of {1}: {2}".format(i,len(output_files),output_file)
        #pull out the second column (first column with actual preditions)
        table=LoadTable(filename=output_file,header=True,sep='\t')
        predictions = table.getRawData(columns=[table.Header[1]])

        #Add the header for our column to the list of headers
        combined_table[0].append(table.Header[1])

        #Add rest of values in the column
        j=1
        for prediction in predictions:
            combined_table[j].append(prediction)
            j+=1

    return combined_table
Beispiel #3
0
    def test_export_table(self):
        """correctly generates table file"""
        orig_data = dict(counts=[[0, 1], [2, 3], [4, 5], [6, 7], [8, 9]],
                         ranks=[0, 1, 2, 3, 4],
                         labels=['a', 'b', 'c', 'd', 'e'])
        coll = RegionCollection(**orig_data)

        expect = coll.toTable().getRawData()
        coll.writeToFile('testdata', as_table=True)
        got = LoadTable('testdata', sep='\t')
        self.assertEqual(got.getRawData(), expect)
        remove_files(['testdata'], error_on_missing=False)
Beispiel #4
0
def wagner_for_picrust(tree_path,
                       trait_table_path,
                       gain=None,
                       max_paralogs=None,
                       HALT_EXEC=False):
    '''Runs count application controller given path of tree and trait table and returns a Table'''
    #initialize Count app controller
    count = Count(HALT_EXEC=HALT_EXEC)

    #set the parameters
    if gain:
        count.Parameters['-gain'].on(gain)
    if max_paralogs:
        count.Parameters['-max_paralogs'].on(max_paralogs)

    ###Have to manipulate the trait table some. Need to transpose it and strip ids surrounded in quotes.
    table = LoadTable(filename=trait_table_path, header=True, sep='\t')

    #get the first column (containing row ids)
    genome_ids = table.getRawData(table.Header[0])
    #remove single quotes from the id if they exist
    genome_ids = [str(id).strip('\'') for id in genome_ids]
    #transpose the matrix
    table = table.transposed(new_column_name=table.Header[0])
    #Change the headers
    table = table.withNewHeader(table.Header[1:], genome_ids)
    #write the modified table to a tmp file
    tmp_table_path = get_tmp_filename()
    table.writeToFile(tmp_table_path, sep='\t')

    #Run Count here
    result = count(data=(tree_path, tmp_table_path))

    #Remove tmp file
    remove(tmp_table_path)

    #tree=LoadTree(tree_path)
    tree = DndParser(open(tree_path))

    #parse the results into a Cogent Table
    asr_table = parse_wagner_parsimony_output(result["StdOut"].readlines(),
                                              remove_num_tips=len(tree.tips()))

    #transpose the table
    asr_table = asr_table.transposed(new_column_name='nodes')

    return asr_table
Beispiel #5
0
def wagner_for_picrust(tree_path,trait_table_path,gain=None,max_paralogs=None,HALT_EXEC=False):
    '''Runs count application controller given path of tree and trait table and returns a Table'''
    #initialize Count app controller
    count=Count(HALT_EXEC=HALT_EXEC)

    #set the parameters
    if gain:
        count.Parameters['-gain'].on(gain)
    if max_paralogs:
        count.Parameters['-max_paralogs'].on(max_paralogs)

    ###Have to manipulate the trait table some. Need to transpose it and strip ids surrounded in quotes.
    table = LoadTable(filename=trait_table_path,header=True,sep='\t')

    #get the first column (containing row ids)
    genome_ids = table.getRawData(table.Header[0])
    #remove single quotes from the id if they exist
    genome_ids=[str(id).strip('\'') for id in genome_ids]
    #transpose the matrix
    table = table.transposed(new_column_name=table.Header[0])
    #Change the headers
    table=table.withNewHeader(table.Header[1:],genome_ids)
    #write the modified table to a tmp file
    tmp_table_path =get_tmp_filename()
    table.writeToFile(tmp_table_path,sep='\t')
       
    #Run Count here
    result = count(data=(tree_path,tmp_table_path))

    #Remove tmp file
    remove(tmp_table_path)

    #tree=LoadTree(tree_path)
    tree=DndParser(open(tree_path))
    
    #parse the results into a Cogent Table
    asr_table= parse_wagner_parsimony_output(result["StdOut"].readlines(),remove_num_tips=len(tree.tips()))

    #transpose the table
    asr_table = asr_table.transposed(new_column_name='nodes')

    return asr_table
Beispiel #6
0
    def _load(self, filename):
        """loads attributes from a gzipped, .npy data structure or a tab delimited
        cogent table"""
        try:
            infile = gzip.GzipFile(filename, 'r')
            data = numpy.load(infile)
            infile.close()

            # remember numpy.load() returns and array object
            # numpy.load().tolist() returns a dict ... wtf !
            data = data.tolist()
            for name in data:
                value = data[name]
                self.__dict__[name] = value
                if (name == 'ranks' or name == 'counts') and value is not None:
                    self.__dict__[name] = value.astype(float)
                if name == 'labels' and value is not None:
                    self.__dict__[name] = value.astype(str)

        except Exception as e:
            print "Trying to load from table"
            data = LoadTable(filename, sep='\t')

            # convert table to collection here.
            ls = []
            rs = []
            cs = []
            for row in data.getRawData():
                l =numpy.unicode(row[0])
                r = numpy.float(row[1])
                c = numpy.array(row[2:len(row)], dtype=numpy.float32)

                ls.append(l)
                rs.append(r)
                cs.append(c)

            self.labels = numpy.array(ls)
            self.ranks = numpy.array(rs)
            self.counts = numpy.array(cs)
        
        self.N = self.counts.shape[0]