def testRun(self): pipeline = RevisionsDiffPipeline('Hello', 'Hello there', {}) pipeline.start_test() result = pipeline.outputs.default.value expected = Revision() expected.wordsAdded = 1 expected.wordsDeleted = 0 expected.wordCount = 2 self.assertEqual(result, expected.to_dict())
def testRun(self, mockUrlOpenMethod): exportLinks = [ 'docs.google.com/123', 'docs.google.com/456', 'docs.google.com/789' ] mockRevisions = [ Revision(exportLink=exportLinks[0]).to_dict(), Revision(exportLink=exportLinks[1]).to_dict(), Revision(exportLink=exportLinks[2]).to_dict() ] exportedText = { exportLinks[0]: StringIO('Hello'), exportLinks[1]: StringIO('Hello world.'), exportLinks[2]: StringIO('Hello.'), } mockUrlOpenMethod.side_effect = lambda url: exportedText[url] # TODO(michaelcupino): Fix tests pipeline = RevisionsAnalysisPipeline(mockRevisions, None) pipeline.start_test() result = pipeline.outputs.default.value revision1 = Revision() revision1.wordsAdded = 1 revision1.wordsDeleted = 0 revision1.wordCount = 1 revision1.exportLink = 'docs.google.com/123' revision2 = Revision() revision2.wordsAdded = 1 revision2.wordsDeleted = 0 revision2.wordCount = 2 revision2.exportLink = 'docs.google.com/456' revision3 = Revision() revision3.wordsAdded = 0 revision3.wordsDeleted = 1 revision3.wordCount = 1 revision3.exportLink = 'docs.google.com/789' self.assertEqual([revision1.to_dict(), revision2.to_dict(), revision3.to_dict()], result)
def run(self, revisionTextA, revisionTextB, revisionDict): gdiff = diff_match_patch() revisionDiffs = gdiff.diff_main(revisionTextA, revisionTextB, False) gdiff.diff_cleanupSemantic(revisionDiffs) revisionDiffs = filter(self.isRemoveOrAdd, revisionDiffs) diffWordCount = map(self.countWords, revisionDiffs) addedWordCount = self.getAddWordCount(diffWordCount) deletedWordCount = self.getDeletedWordCount(diffWordCount) revision = Revision(**revisionDict) revision.wordsAdded = addedWordCount revision.wordsDeleted = deletedWordCount revision.wordCount = self.getWordCount(revisionTextB) return revision.to_dict()