Example #1
0
    def validate(self):
        
        infile = SegmentedFile.fileopen( self.mFilenameSegments )

        last_nid = None
        found = set()
        nfound, nunknown, nduplicate = 0, 0, 0
        for line in infile:
            ninput += 1
            nid = line[:line.index("\t")]
            if nid != last_nid:
                if nid in found:
                    nduplicates += 1
                    self.warn("duplicate nid: %i in file %s" % (nid, filename))
                if nid not in tokens:
                    nunknown += 1
                    self.warn("unknown nid: %i in file %s" % (nid, filename))
                found.add(nid)
                nfound += 1
                last_nid = nid
            noutput += 1

        missing = set(self.mFasta.getTokens()).difference( found ) 
        if len(missing) > 0:
            self.warn( "the following nids were missing: %s" % str(missing) )

        self.info( "merging: ninput=%i, noutput=%i, nfound=%i, nmissing=%i, nduplicate=%i, nunknown=%i" %\
                       (ninput, noutput, nfound, len(missing), nduplicate, nunknown ) )
        
        return len(missing) == 0 and nduplicate == 0 and nunknown == 0
Example #2
0
    def validate(self):
        """merge runs from parallel computations.

        Note: duplicated code with AddaSegments - can be merged.

        returns true if merging was succecss.
        """
        infiles = self.getPartialResults()
        last_nid = None
        found = set()
        nfound, nunknown, nduplicate = 0, 0, 0
        infile = SegmentedFile.fileopen( self.mFilenameGraph )
        for line in infile:
            ninput += 1
            nid = line[:line.index("\t")]
            if nid != last_nid:
                if nid in found:
                    nduplicates += 1
                    self.warn("duplicate nid: %i in file %s" % (nid, filename))
                if nid not in tokens:
                    nunknown += 1
                    self.warn("unknown nid: %i in file %s" % (nid, filename))
                found.add(nid)
                nfound += 1
                last_nid = nid
            noutput += 1

        missing = set(self.mFasta.getTokens()).difference( found ) 
        if len(missing) > 0:
            self.warn( "the following nids were missing: %s" % str(missing) )

        self.info( "merging: ninput=%i, noutput=%i, nfound=%i, nmissing=%i, nduplicate=%i, nunknown=%i" %\
                       (ninput, noutput, nfound, len(missing), nduplicate, nunknown ) )
        
        return len(missing) == 0 and nduplicate == 0 and nunknown == 0
Example #3
0
    def validate(self):

        infile = SegmentedFile.fileopen(self.mFilenameSegments)

        last_nid = None
        found = set()
        nfound, nunknown, nduplicate = 0, 0, 0
        for line in infile:
            ninput += 1
            nid = line[:line.index("\t")]
            if nid != last_nid:
                if nid in found:
                    nduplicates += 1
                    self.warn("duplicate nid: %i in file %s" % (nid, filename))
                if nid not in tokens:
                    nunknown += 1
                    self.warn("unknown nid: %i in file %s" % (nid, filename))
                found.add(nid)
                nfound += 1
                last_nid = nid
            noutput += 1

        missing = set(self.mFasta.getTokens()).difference(found)
        if len(missing) > 0:
            self.warn("the following nids were missing: %s" % str(missing))

        self.info( "merging: ninput=%i, noutput=%i, nfound=%i, nmissing=%i, nduplicate=%i, nunknown=%i" %\
                       (ninput, noutput, nfound, len(missing), nduplicate, nunknown ) )

        return len(missing) == 0 and nduplicate == 0 and nunknown == 0