def writeZipFile(archiveType, fileNameRoot, shouldMakeSnapshot, wikiAddress): 'Write zip file.' print('Copying:') print(wikiAddress) print('') almoner.makeDirectory(fileNameRoot) previousLetter = '0' lastModifiedText = datetime.datetime.today().strftime(globalDateTimeFormat) titles = getRecentTitles(archiveType, fileNameRoot, wikiAddress) print('Number of titles: %s' % len(titles)) almoner.writeFileText(os.path.join(fileNameRoot, 'last_modified.txt'), lastModifiedText) for title in titles: letter = title[0] if letter != previousLetter: previousLetter = letter print('Copying articles starting with %s.' % letter.upper()) sourceText = almoner.getSourceText(wikiAddress + '/doku.php?id=%s&do=edit' % title) time.sleep(2) fileName = os.path.join(fileNameRoot, title) almoner.writeFileText(fileName, sourceText) print('There were %s files in the wiki.\n' % len(titles)) archiveFileName = fileNameRoot + '.' + archiveType if archiveType == 'zip': almoner.writeZipFileByFolder(fileNameRoot) else: writeTarFileByFolder(archiveType, archiveFileName, fileNameRoot) if shouldMakeSnapshot: snapshotSuffix = datetime.datetime.today().strftime('_%y-%m-%d_%H') destination = fileNameRoot + snapshotSuffix + '.' + archiveType shutil.copyfile(archiveFileName, destination) print('The snapshot zip file has been written to:\n%s\n' % destination)
def getSourceTextIfByAuthor(author, linkName): 'Get the source text if the author wrote it.' if linkName == '': return '' time.sleep(0.5) # if getIsLastEditByAuthor(author, linkName): #unfinished, check removed because it's taking too long # return almoner.getSourceText('http://devtome.com/doku.php?id=%s&do=edit' % linkName) return almoner.getSourceText('http://devtome.com/doku.php?id=%s&do=edit' % linkName) return ''
def getIsWriterNewProductive(name, paidNameSet): 'Determine if the writer is new and wrote at least a thousand words.' if name.lower() in paidNameSet: return False totalWordCount = 0 articles = rater.getArticles(name) for article in articles: print( article) totalWordCount += devtome.getWordCount(almoner.getSourceText('http://devtome.com/doku.php?id=%s&do=edit' % article)) if totalWordCount >= 1000: return True return False
def __init__(self, coinAddress, isShareName, name): 'Initialize.' self.coinAddress = coinAddress self.domainPayoutSet = set([]) self.name = name self.payoutFifth = 0 self.postPayoutSet = set([]) self.postWords = 0 self.signaturePageSet = set([]) self.sourceAddress = 'http://devtome.com/doku.php?id=wiki:user:%s&do=edit' % self.name self.subdomainPayout = 0 print('\nLoading pages from %s' % self.name.capitalize()) sourceText = almoner.getSourceText(self.sourceAddress) isLink = False isPost = False isSignature = False for line in almoner.getTextLines(sourceText): lineStrippedLower = line.strip().lower() if '==' in lineStrippedLower: isLink = False isPost = False isSignature = False if 'link' in lineStrippedLower: isLink = True if 'post' in lineStrippedLower: isPost = True if 'signature' in lineStrippedLower: isSignature = True if isLink: self.addLinkPayout(lineStrippedLower) if isPost: self.addPostPayout(lineStrippedLower) if isSignature: self.addSignaturePayout(lineStrippedLower) if len(self.domainPayoutSet) == 0: if self.subdomainPayout == 1: self.payoutFifth += 1 print('Subdomain payout: 1') if self.postWords > 100: if self.postWords > 1000: self.payoutFifth += 2 print('Big post payout: 2') else: self.payoutFifth += 1 print('Small post payout: 1') if self.payoutFifth > 0: if isShareName: print('%s is on a share list, so the payout is doubled.' % self.name) self.payoutFifth += self.payoutFifth print('Total payout fifths: %s' % self.payoutFifth)
def getIsWriterNewProductive(name, paidNameSet): 'Determine if the writer is new and wrote at least a thousand words.' if name.lower() in paidNameSet: return False totalWordCount = 0 articles = rater.getArticles(name) for article in articles: print(article) totalWordCount += devtome.getWordCount( almoner.getSourceText('http://devtome.com/doku.php?id=%s&do=edit' % article)) if totalWordCount >= 1000: return True return False
def getArticleLinkString(articles): 'Get the article link string if there is an article with text.' articleIndexStart = int(float(len(articles)) * random.random()) longestArticleLinkString = '' longestLength = 0 for extraIndex in xrange(len(articles)): articleLinkString = articles[(articleIndexStart + extraIndex) % len(articles)].replace('_', ' ').capitalize() sourceText = almoner.getSourceText('http://devtome.com/doku.php?id=%s&do=edit' % articleLinkString) if len(sourceText) > 200: return articleLinkString if len(sourceText) > longestLength: longestLength = len(sourceText) longestArticleLinkString = articleLinkString return longestArticleLinkString
def getRatingsByAddress(address): 'Get the ratings by address.' ratings = [] firstUnderscore = address.find('_') if firstUnderscore == -1: print('Warning, no underscore in address.') return [] lastUnderscore = address.rfind('_') if firstUnderscore == lastUnderscore: print('Warning, firstUnderscore same as lastUnderscore.') return [] rater = address[firstUnderscore + 1 : lastUnderscore].lower() lines = almoner.getTextLines(almoner.getSourceText(address)) for line in lines: rating = Rating(address, line, rater) if rating.article != '': ratings.append(rating) return ratings
def getArticles(name): 'Get invoiced articles by name.' articles = [] sourceAddress = 'http://devtome.com/doku.php?id=wiki:user:%s&do=edit' % name print('Loading user page from %s' % name) sourceText = almoner.getSourceText(sourceAddress) isArticle = False for line in almoner.getTextLines(sourceText): lineStrippedLower = line.strip().lower() if '==' in lineStrippedLower: if '===' not in lineStrippedLower: isArticle = False if isArticle: lowerLinkName = devtome.getLinkName(line, name).lower() if lowerLinkName != '': articles.append(lowerLinkName) if '==' in lineStrippedLower: if 'collated' in lineStrippedLower or 'original' in lineStrippedLower: isArticle = True return articles
def __init__(self, averageRating, backupFolder, backupFileSet, categoryDictionary, ratingDictionary, titles, viewDictionary, words): 'Initialize.' self.backupFolder = backupFolder self.backupFileSet = backupFileSet identicalCollatedCount = 0 identicalOriginalCount = 0 self.newArticles = [] self.tomecount = Tomecount() self.parameterDictionary = {} self.sentenceSet = set([]) self.warnings = [] for wordIndex, word in enumerate(words): self.parameterDictionary[titles[wordIndex]] = word if 'Cumulative Payout' in self.parameterDictionary: self.tomecount.previousPayout = int( self.parameterDictionary['Cumulative Payout']) self.name = self.parameterDictionary['Name'] self.sourceAddress = 'http://devtome.com/doku.php?id=wiki:user:%s&do=edit' % self.name tipAddress = '' print('Loading articles from %s' % self.name) sourceText = almoner.getSourceText(self.sourceAddress) almoner.writeFileText( os.path.join(backupFolder, 'wiki:user:'******'==' in lineStrippedLower: if '===' not in lineStrippedLower: isCollated = False isOriginal = False isTip = False if isCollated: linkName = getLinkName(line, self.name) underscoredLinkName = linkName.lower().replace(' ', '_') linkText = getSourceTextIfByAuthor(self, linkName) if linkName != '' and linkText == '': self.printWarning( 'Warning, could not invoice article link: %s' % linkName) if linkText not in linkTexts: linkTexts.add(linkText) self.tomecount.imageCount += getImageCount(linkText) wordCount = getWordCount(linkText) if underscoredLinkName in viewDictionary: self.tomecount.pageViews += viewDictionary[ underscoredLinkName] if wordCount > 0: print('Collated article: %s, Word Count: %s' % (lineStrippedLower, almoner.getCommaNumberString(wordCount))) self.saveArticle(categoryDictionary, linkName, linkText, underscoredLinkName) identicalCollatedCount += self.getIdenticalWordCount( linkText) self.tomecount.collatedWordCount += wordCount if isOriginal: linkName = getLinkName(line, self.name) underscoredLinkName = linkName.lower().replace(' ', '_') linkText = getSourceTextIfByAuthor(self, linkName) if linkName != '' and linkText == '': self.printWarning( 'Warning, could not invoice article link: %s' % linkName) if linkText not in linkTexts: linkTexts.add(linkText) self.tomecount.imageCount += getImageCount(linkText) wordCount = getWordCount(linkText) if underscoredLinkName in viewDictionary: self.tomecount.pageViews += viewDictionary[ underscoredLinkName] if wordCount > 0: print('Original article: %s, Word Count: %s' % (lineStrippedLower, almoner.getCommaNumberString(wordCount))) self.saveArticle(categoryDictionary, linkName, linkText, underscoredLinkName) identicalOriginalCount += self.getIdenticalWordCount( linkText) self.tomecount.originalWordCount += wordCount if isTip: tipLine = line.strip().replace("'", '') colonIndex = tipLine.find(':') if colonIndex >= 0: addressName = tipLine[:colonIndex].strip().lower() if 'dvc' in addressName or 'devcoin' in addressName or 'coin address' in addressName: tipAddress = tipLine[colonIndex + 1:].strip() if '==' in lineStrippedLower: if 'collated' in lineStrippedLower: isCollated = True elif 'original' in lineStrippedLower: isOriginal = True elif 'tip' in lineStrippedLower: isTip = True if identicalCollatedCount > 0: self.tomecount.collatedWeightedWordCount -= identicalCollatedCount print('Identical Collated Word Count: %s' % almoner.getCommaNumberString(identicalCollatedCount)) if identicalOriginalCount > 0: self.tomecount.originalWordCount -= identicalOriginalCount print('Identical Original Word Count: %s' % almoner.getCommaNumberString(identicalOriginalCount)) self.tomecount.collatedWeightedWordCount = self.tomecount.collatedWordCount * 3 / 10 self.tomecount.wordCount = self.tomecount.collatedWordCount + self.tomecount.originalWordCount self.tomecount.weightedWordCount = self.tomecount.collatedWeightedWordCount + self.tomecount.originalWordCount self.tomecount.weightedWordCount += 10 * self.tomecount.imageCount if self.tomecount.weightedWordCount >= 1000: self.tomecount.cumulativePayout = int( round(float(self.tomecount.weightedWordCount) * 0.001)) print('Weighted Word Count: %s' % almoner.getCommaNumberString(self.tomecount.weightedWordCount)) self.tomecount.payout = max( self.tomecount.cumulativePayout - self.tomecount.previousPayout, 0) maximumPayout = 50 if tipAddress != self.parameterDictionary[ 'Coin Address'] and self.name != 'Mosinnagant': self.printWarning( 'Warning, the coin address is not the same as the tip address, so nothing will be paid.' ) maximumPayout = 0 if self.tomecount.payout > maximumPayout: self.tomecount.payout = maximumPayout self.tomecount.cumulativePayout = self.tomecount.previousPayout + maximumPayout if self.tomecount.cumulativePayout > 0: self.tomecount.categorization = float( self.tomecount.categorizedArticleCount) / float( self.tomecount.articleCount) self.tomecount.ratingMedian = averageRating lowerName = self.name.lower() if lowerName in ratingDictionary: self.tomecount.ratingMedian = ratingDictionary[lowerName] weightedPageViews = self.tomecount.pageViews if self.tomecount.previousPayout == 0: weightedPageViews += weightedPageViews self.tomecount.viewsPerThousandWords = 1000.0 * float( weightedPageViews) / float(self.tomecount.weightedWordCount) self.tomecount.normalizedCategorization = self.tomecount.categorization self.tomecount.normalizedPopularity = self.tomecount.viewsPerThousandWords self.tomecount.normalizedRatingMedian = self.tomecount.ratingMedian self.tomecount.popularityTimesRating = int( round(self.tomecount.pageViews * float(self.tomecount.ratingMedian) / 99.0))
def writeCategoryFile(categoryDictionary, categoryFolder, categoryKey, rootFileName): 'Write category file to a folder.' categorySuffix = 'category:' + categoryKey categoryFileName = os.path.join(categoryFolder, categorySuffix) sourceText = almoner.getSourceText( 'http://devtome.com/doku.php?id=%s&do=edit' % categorySuffix).replace( '"', '"') scriptToken = '{{script}}' scriptIndex = sourceText.find(scriptToken) if scriptIndex == -1: return scriptIndex += len(scriptToken) categoryText = sourceText[:scriptIndex] + '\n' afterScriptText = sourceText[scriptIndex:] lastLetter = None lines = almoner.getTextLines(afterScriptText) isAlphabeticallyGrouped = False scriptEndToken = None titleDictionary = {} for line in lines: if scriptEndToken == None: lineStripped = line.strip() if lineStripped != '': if lineStripped.startswith('=') and lineStripped.endswith('='): heading = lineStripped.replace('=', '').strip() if len(heading) > 1: scriptEndToken = lineStripped elif len(heading) == 1: isAlphabeticallyGrouped = True else: if lineStripped.startswith('*'): lineStripped = lineStripped[1:] if lineStripped.startswith('[['): lineStripped = lineStripped[2:] if lineStripped.startswith(':'): lineStripped = lineStripped[1:] if lineStripped.endswith(']]'): lineStripped = lineStripped[:-2] titleKey = lineStripped.lower().replace('_', ' ') barIndex = titleKey.find('|') if barIndex != -1: titleKey = titleKey[:barIndex] titleDictionary[titleKey] = lineStripped fromTokenText = '' if scriptEndToken != None: fromTokenText = afterScriptText[afterScriptText.find(scriptEndToken):] articleTitles = categoryDictionary[categoryKey] for articleTitle in articleTitles: articleTitleLower = articleTitle.lower().replace('_', ' ') if articleTitleLower not in titleDictionary: titleDictionary[articleTitleLower] = articleTitle titleKeys = titleDictionary.keys() titleKeys.sort() for titleKey in titleKeys: if isAlphabeticallyGrouped: firstLetter = titleKey[0] if firstLetter != lastLetter: categoryText += '===%s===\n' % firstLetter.capitalize() lastLetter = firstLetter title = titleDictionary[titleKey] if not ']]' in title: title += ']]' categoryText += '[[:%s\n\n' % title categoryText += fromTokenText almoner.writeFileText(os.path.join(categoryFolder, categorySuffix), categoryText)
def __init__(self, averageRating, backupFolder, backupFileSet, categoryDictionary, ratingDictionary, titles, viewDictionary, words): 'Initialize.' self.backupFolder = backupFolder self.backupFileSet = backupFileSet identicalCollatedCount = 0 identicalOriginalCount = 0 self.newArticles = [] self.tomecount = Tomecount() self.parameterDictionary = {} self.sentenceSet = set([]) self.warnings = [] for wordIndex, word in enumerate(words): self.parameterDictionary[titles[wordIndex]] = word if 'Cumulative Payout' in self.parameterDictionary: self.tomecount.previousPayout = int(self.parameterDictionary['Cumulative Payout']) self.name = self.parameterDictionary['Name'] self.sourceAddress = 'http://devtome.com/doku.php?id=wiki:user:%s&do=edit' % self.name tipAddress = '' print('Loading articles from %s' % self.name) sourceText = almoner.getSourceText(self.sourceAddress) almoner.writeFileText(os.path.join(backupFolder, 'wiki:user:'******'==' in lineStrippedLower: if '===' not in lineStrippedLower: isCollated = False isOriginal = False isTip = False if isCollated: linkName = getLinkName(line, self.name) underscoredLinkName = linkName.lower().replace(' ', '_') linkText = getSourceTextIfByAuthor(self, linkName) if linkName != '' and linkText == '': self.printWarning('Warning, could not invoice article link: %s' % linkName) if linkText not in linkTexts: linkTexts.add(linkText) self.tomecount.imageCount += getImageCount(linkText) wordCount = getWordCount(linkText) if underscoredLinkName in viewDictionary: self.tomecount.pageViews += viewDictionary[underscoredLinkName] if wordCount > 0: print('Collated article: %s, Word Count: %s' % (lineStrippedLower, almoner.getCommaNumberString(wordCount))) self.saveArticle(categoryDictionary, linkName, linkText, underscoredLinkName) identicalCollatedCount += self.getIdenticalWordCount(linkText) self.tomecount.collatedWordCount += wordCount if isOriginal: linkName = getLinkName(line, self.name) underscoredLinkName = linkName.lower().replace(' ', '_') linkText = getSourceTextIfByAuthor(self, linkName) if linkName != '' and linkText == '': self.printWarning('Warning, could not invoice article link: %s' % linkName) if linkText not in linkTexts: linkTexts.add(linkText) self.tomecount.imageCount += getImageCount(linkText) wordCount = getWordCount(linkText) if underscoredLinkName in viewDictionary: self.tomecount.pageViews += viewDictionary[underscoredLinkName] if wordCount > 0: print('Original article: %s, Word Count: %s' % (lineStrippedLower, almoner.getCommaNumberString(wordCount))) self.saveArticle(categoryDictionary, linkName, linkText, underscoredLinkName) identicalOriginalCount += self.getIdenticalWordCount(linkText) self.tomecount.originalWordCount += wordCount if isTip: tipLine = line.strip().replace("'", '') colonIndex = tipLine.find(':') if colonIndex >= 0: addressName = tipLine[: colonIndex].strip().lower() if 'dvc' in addressName or 'devcoin' in addressName or 'coin address' in addressName: tipAddress = tipLine[colonIndex + 1 :].strip() if '==' in lineStrippedLower: if 'collated' in lineStrippedLower: isCollated = True elif 'original' in lineStrippedLower: isOriginal = True elif 'tip' in lineStrippedLower: isTip = True if identicalCollatedCount > 0: self.tomecount.collatedWeightedWordCount -= identicalCollatedCount print('Identical Collated Word Count: %s' % almoner.getCommaNumberString(identicalCollatedCount)) if identicalOriginalCount > 0: self.tomecount.originalWordCount -= identicalOriginalCount print('Identical Original Word Count: %s' % almoner.getCommaNumberString(identicalOriginalCount)) self.tomecount.collatedWeightedWordCount = self.tomecount.collatedWordCount * 3 / 10 self.tomecount.wordCount = self.tomecount.collatedWordCount + self.tomecount.originalWordCount self.tomecount.weightedWordCount = self.tomecount.collatedWeightedWordCount + self.tomecount.originalWordCount self.tomecount.weightedWordCount += 10 * self.tomecount.imageCount if self.tomecount.weightedWordCount >= 1000: self.tomecount.cumulativePayout = int(round(float(self.tomecount.weightedWordCount) * 0.001)) print('Weighted Word Count: %s' % almoner.getCommaNumberString(self.tomecount.weightedWordCount)) self.tomecount.payout = max(self.tomecount.cumulativePayout - self.tomecount.previousPayout, 0) maximumPayout = 50 if tipAddress != self.parameterDictionary['Coin Address'] and self.name != 'Mosinnagant': self.printWarning('Warning, the coin address is not the same as the tip address, so nothing will be paid.') maximumPayout = 0 if self.tomecount.payout > maximumPayout: self.tomecount.payout = maximumPayout self.tomecount.cumulativePayout = self.tomecount.previousPayout + maximumPayout if self.tomecount.cumulativePayout > 0: self.tomecount.categorization = float(self.tomecount.categorizedArticleCount) / float(self.tomecount.articleCount) self.tomecount.ratingMedian = averageRating lowerName = self.name.lower() if lowerName in ratingDictionary: self.tomecount.ratingMedian = ratingDictionary[lowerName] weightedPageViews = self.tomecount.pageViews if self.tomecount.previousPayout == 0: weightedPageViews += weightedPageViews self.tomecount.viewsPerThousandWords = 1000.0 * float(weightedPageViews) / float(self.tomecount.weightedWordCount) self.tomecount.normalizedCategorization = self.tomecount.categorization self.tomecount.normalizedPopularity = self.tomecount.viewsPerThousandWords self.tomecount.normalizedRatingMedian = self.tomecount.ratingMedian self.tomecount.popularityTimesRating = int(round(self.tomecount.pageViews * float(self.tomecount.ratingMedian) / 99.0))
def writeCategoryFile(categoryDictionary, categoryFolder, categoryKey, rootFileName): 'Write category file to a folder.' categorySuffix = 'category:' + categoryKey categoryFileName = os.path.join(categoryFolder, categorySuffix) sourceText = almoner.getSourceText('http://devtome.com/doku.php?id=%s&do=edit' % categorySuffix).replace('"', '"') scriptToken = '{{script}}' scriptIndex = sourceText.find(scriptToken) if scriptIndex == -1: return scriptIndex += len(scriptToken) categoryText = sourceText[: scriptIndex] + '\n' afterScriptText = sourceText[scriptIndex :] lastLetter = None lines = almoner.getTextLines(afterScriptText) isAlphabeticallyGrouped = False scriptEndToken = None titleDictionary = {} for line in lines: if scriptEndToken == None: lineStripped = line.strip() if lineStripped != '': if lineStripped.startswith('=') and lineStripped.endswith('='): heading = lineStripped.replace('=', '').strip() if len(heading) > 1: scriptEndToken = lineStripped elif len(heading) == 1: isAlphabeticallyGrouped = True else: if lineStripped.startswith('*'): lineStripped = lineStripped[1 :] if lineStripped.startswith('[['): lineStripped = lineStripped[2 :] if lineStripped.startswith(':'): lineStripped = lineStripped[1 :] if lineStripped.endswith(']]'): lineStripped = lineStripped[: -2] titleKey = lineStripped.lower().replace('_', ' ') barIndex = titleKey.find('|') if barIndex != -1: titleKey = titleKey[: barIndex] titleDictionary[titleKey] = lineStripped fromTokenText = '' if scriptEndToken != None: fromTokenText = afterScriptText[afterScriptText.find(scriptEndToken) :] articleTitles = categoryDictionary[categoryKey] for articleTitle in articleTitles: articleTitleLower = articleTitle.lower().replace('_', ' ') if articleTitleLower not in titleDictionary: titleDictionary[articleTitleLower] = articleTitle titleKeys = titleDictionary.keys() titleKeys.sort() for titleKey in titleKeys: if isAlphabeticallyGrouped: firstLetter = titleKey[0] if firstLetter != lastLetter: categoryText += '===%s===\n' % firstLetter.capitalize() lastLetter = firstLetter title = titleDictionary[titleKey] if not ']]' in title: title += ']]' categoryText += '[[:%s\n\n' % title categoryText += fromTokenText almoner.writeFileText(os.path.join(categoryFolder, categorySuffix), categoryText)