Exemple #1
0
    def label(self) :
        self.seqdb = self.__read_fasta(self.options['cluster-fasta'])
        blast_fname = self.options['cluster-fasta']

        # if we are only going to label the clusters without labels
        # then we need to find the names of those clusters and write a 
        # fasta file containing only those sequences
        if self.options['label-missing'] :
            tmp = []
            biom = json.load(open(self.options['cluster-biom']))
            for r in biom['rows'] :
                if r['metadata']['label'] in ("", "unknown", "error", "cannot label (matches multiple domains!)") :
                    tmp.append(r['id'])

            if len(tmp) == 0 :
                self.log.error("there are no missing labels")
                exit(1)

            self.log.info("%d clusters missing labels" % len(tmp))
            blast_fname = self.__fasta(join(self.options['outdir'], 'missing.fasta'), tmp)


        print "getting OTU names (this may take a while)..." 
        otu_names = BlastN(self.options['verbose']).get_names(blast_fname, self.options['labels'], self.options['labels-similarity'], self.options['labels_db'])

        # rework the biom
        biom = BiomFile()
        biom.change_otu_names(self.options['cluster-biom'], otu_names)
        self.log.info("written %s" % self.options['cluster-biom'])

        # get the rest of the names and rewrite fasta
        otu_names = biom.get_label_mapping(self.options['cluster-biom'])
        self.__fasta(self.options['cluster-fasta'], self.seqdb.keys(), names=otu_names)

        return 0
Exemple #2
0
    def showlabels(self) :
        delim = self.options['delimiter']
        
        #biom = json.load(open(self.options['cluster-biom']))
        #
        #for r in biom['rows'] :
        #    print delim.join([r['id'], r['metadata']['label']])

        biom = BiomFile()
        labels = biom.get_label_mapping(self.options['cluster-biom'])

        for x in labels.iteritems() :
            print delim.join(x)

        return 0