Exemple #1
0
    def merge(self):
        """merge runs from parallel computations.

        returns true if merging was succecss.
        """
        if self.isComplete(): return

        infiles = glob.glob("%s*" % self.mFilenameProfile)
        # remove suffixes
        infiles = list(
            set([x[:-4] for x in infiles if x != self.mFilenameProfile]))
        infiles.sort()

        last_nid = None
        found = set()
        ninput, noutput, nfound, nunknown, nduplicate = 0, 0, 0, 0, 0
        tokens = set(self.mFasta.keys())

        self.mProfileLibrary = ProfileLibrary.ProfileLibrary(
            self.mFilenameProfile, "w")

        for filename in infiles:
            infile = ProfileLibrary.ProfileLibrary(filename, "r")

            for nid, profile in infile.iteritems_sorted():
                ninput += 1

                if nid in found:
                    nduplicates += 1
                    self.warn("duplicate nid: %i in file %s" % (nid, filename))
                if nid not in tokens:
                    nunknown += 1
                    self.warn("unknown nid: %i in file %s" % (nid, filename))
                found.add(nid)
                nfound += 1
                self.mProfileLibrary.add(nid, profile)
                noutput += 1

        missing = tokens.difference(found)
        if len(missing) > 0:
            self.warn("the following nids were missing: %s" % str(missing))

        self.info("adding %i missing nids" % len(missing))

        for nid in missing:
            self.applyMethod(AddaIO.NeighboursRecord(nid, []))

        self.info( "merging: parts=%i, ninput=%i, noutput=%i, nfound=%i, nmissing=%i, nduplicate=%i, nunknown=%i" %\
                       (len(infiles), ninput, noutput, nfound, len(missing), nduplicate, nunknown ) )

        self.info("deleting %i parts" % len(infiles))
        for infile in infiles:
            fn, fi = ProfileLibrary.getFileNames(infile)
            os.remove(fn)
            os.remove(fi)

        return len(missing) == 0 and nduplicate == 0 and nunknown == 0
Exemple #2
0
    def finish(self):
        """finish processing.
        
        add entries for sequences who only appear in the sbjct field.
        """
        if not self.isSubset():
            nids = self.mFasta.getContigSizes().keys()
            nadded = 0

            for nid in sorted(nids):
                if nid not in self.mProfileLibrary:
                    self.applyMethod(AddaIO.NeighboursRecord(nid, []))
                    nadded += 1

            self.mOutput += nadded
            self.info("added %i profiles for sequences without neighbours" %
                      nadded)

        self.mProfileLibrary.close()

        AddaModuleRecord.finish(self)
Exemple #3
0
    config = AddaIO.ConfigParser()
    config.read(os.path.expanduser(options.filename_config))

    filename_graph = config.get("files", "output_graph", "adda.graph")
    filename_index = config.get("files", "output_index", "adda.graph.index")
    filename_fasta = config.get("files", "output_fasta", "adda")

    fasta = IndexedFasta.IndexedFasta(filename_fasta)

    index = cadda.IndexedNeighbours(filename_graph, filename_index)

    config.set("files", "output_segments", "test.segments")

    module = AddaSegment(
        config=config,
        fasta=fasta,
    )

    module.startUp()

    args = map(int, args)

    for nid in args:
        neighbours = index.getNeighbours(nid)
        module.applyMethod(AddaIO.NeighboursRecord(nid, neighbours))

    module.finish

    E.Stop()