def showDiffRandom(fileName): # at this point shows the diff of the first article article = getRandomArticle(fileName) title = article.getTitle() convertedArticle = None if wikipediasql.fConvertedCacheExists(fileName): convertedArticle = findConvertedArticle(fileName,title) if not convertedArticle: print "didn't find article '%s' in the converted cache" % title origTxt = article.getText() origTxt = arsutils.normalizeNewlines(origTxt) if convertedArticle: converted = arsutils.normalizeNewlines(convertedArticle.getText()) arsutils.showTxtDiff(origTxt, converted) else: converted = articleconvert.convertArticle(article.getTitle(), article.getText()) converted = arsutils.normalizeNewlines(converted) arsutils.showTxtDiff(origTxt,converted)
def showDiffRandom(fileName): # at this point shows the diff of the first article article = getRandomArticle(fileName) title = article.getTitle() convertedArticle = None if wikipediasql.fConvertedCacheExists(fileName): convertedArticle = findConvertedArticle(fileName, title) if not convertedArticle: print "didn't find article '%s' in the converted cache" % title origTxt = article.getText() origTxt = arsutils.normalizeNewlines(origTxt) if convertedArticle: converted = arsutils.normalizeNewlines(convertedArticle.getText()) arsutils.showTxtDiff(origTxt, converted) else: converted = articleconvert.convertArticle(article.getTitle(), article.getText()) converted = arsutils.normalizeNewlines(converted) arsutils.showTxtDiff(origTxt, converted)
def showDiffTitle(fileName, title, fSave=False, fForceConvert=False): article = findOrigArticle(fileName, title) if not article: print "couldn't find article with the title %s" % title return origTxt = article.getText() origTxt = arsutils.normalizeNewlines(origTxt) if fForceConvert: convertedTxt = articleconvert.convertArticle(article.getTitle(), article.getText()) else: title = article.getTitle( ) # re-get the title in case this was a redirect convertedArticle = None if wikipediasql.fConvertedCacheExists(fileName): convertedArticle = findConvertedArticle(fileName, title) else: print "Converted cache for '%s' doesn't exist" % fileName sys.exit(0) if None == convertedArticle: print "didn't find converted article, generating it myself" convertedTxt = articleconvert.convertArticle( article.getTitle(), article.getText()) else: convertedTxt = convertedArticle.getText() convertedTxt = arsutils.normalizeNewlines(convertedTxt) if fSave: title = article.getTitle() title = title.replace(" ", "_") fo = open("%s_orig.txt" % title, "wb") fo.write(origTxt) fo.close() fo = open("%s_conv.txt" % title, "wb") fo.write(convertedTxt) fo.close() arsutils.showTxtDiff(origTxt, convertedTxt)
def showDiffTitle(fileName,title,fSave=False,fForceConvert=False): article = findOrigArticle(fileName,title) if not article: print "couldn't find article with the title %s" % title return origTxt = article.getText() origTxt = arsutils.normalizeNewlines(origTxt) if fForceConvert: convertedTxt = articleconvert.convertArticle(article.getTitle(), article.getText()) else: title = article.getTitle() # re-get the title in case this was a redirect convertedArticle = None if wikipediasql.fConvertedCacheExists(fileName): convertedArticle = findConvertedArticle(fileName,title) else: print "Converted cache for '%s' doesn't exist" % fileName sys.exit(0) if None == convertedArticle: print "didn't find converted article, generating it myself" convertedTxt = articleconvert.convertArticle(article.getTitle(), article.getText()) else: convertedTxt = convertedArticle.getText() convertedTxt = arsutils.normalizeNewlines(convertedTxt) if fSave: title = article.getTitle() title = title.replace(" ", "_") fo = open("%s_orig.txt" % title, "wb") fo.write(origTxt) fo.close() fo = open("%s_conv.txt" % title, "wb") fo.write(convertedTxt) fo.close() arsutils.showTxtDiff(origTxt, convertedTxt)