def validate(self): infile = SegmentedFile.fileopen( self.mFilenameSegments ) last_nid = None found = set() nfound, nunknown, nduplicate = 0, 0, 0 for line in infile: ninput += 1 nid = line[:line.index("\t")] if nid != last_nid: if nid in found: nduplicates += 1 self.warn("duplicate nid: %i in file %s" % (nid, filename)) if nid not in tokens: nunknown += 1 self.warn("unknown nid: %i in file %s" % (nid, filename)) found.add(nid) nfound += 1 last_nid = nid noutput += 1 missing = set(self.mFasta.getTokens()).difference( found ) if len(missing) > 0: self.warn( "the following nids were missing: %s" % str(missing) ) self.info( "merging: ninput=%i, noutput=%i, nfound=%i, nmissing=%i, nduplicate=%i, nunknown=%i" %\ (ninput, noutput, nfound, len(missing), nduplicate, nunknown ) ) return len(missing) == 0 and nduplicate == 0 and nunknown == 0
def validate(self): """merge runs from parallel computations. Note: duplicated code with AddaSegments - can be merged. returns true if merging was succecss. """ infiles = self.getPartialResults() last_nid = None found = set() nfound, nunknown, nduplicate = 0, 0, 0 infile = SegmentedFile.fileopen( self.mFilenameGraph ) for line in infile: ninput += 1 nid = line[:line.index("\t")] if nid != last_nid: if nid in found: nduplicates += 1 self.warn("duplicate nid: %i in file %s" % (nid, filename)) if nid not in tokens: nunknown += 1 self.warn("unknown nid: %i in file %s" % (nid, filename)) found.add(nid) nfound += 1 last_nid = nid noutput += 1 missing = set(self.mFasta.getTokens()).difference( found ) if len(missing) > 0: self.warn( "the following nids were missing: %s" % str(missing) ) self.info( "merging: ninput=%i, noutput=%i, nfound=%i, nmissing=%i, nduplicate=%i, nunknown=%i" %\ (ninput, noutput, nfound, len(missing), nduplicate, nunknown ) ) return len(missing) == 0 and nduplicate == 0 and nunknown == 0
def validate(self): infile = SegmentedFile.fileopen(self.mFilenameSegments) last_nid = None found = set() nfound, nunknown, nduplicate = 0, 0, 0 for line in infile: ninput += 1 nid = line[:line.index("\t")] if nid != last_nid: if nid in found: nduplicates += 1 self.warn("duplicate nid: %i in file %s" % (nid, filename)) if nid not in tokens: nunknown += 1 self.warn("unknown nid: %i in file %s" % (nid, filename)) found.add(nid) nfound += 1 last_nid = nid noutput += 1 missing = set(self.mFasta.getTokens()).difference(found) if len(missing) > 0: self.warn("the following nids were missing: %s" % str(missing)) self.info( "merging: ninput=%i, noutput=%i, nfound=%i, nmissing=%i, nduplicate=%i, nunknown=%i" %\ (ninput, noutput, nfound, len(missing), nduplicate, nunknown ) ) return len(missing) == 0 and nduplicate == 0 and nunknown == 0