def getIsLastEditByAuthor(author, linkString): 'Determine if the last edit was by the author.' nameLower = author.name.lower() if nameLower in globalNames: return True revisionsText = almoner.getInternetText('http://devtome.com/doku.php?id=%s&do=revisions' % linkString) time.sleep(0.5) lastModIndex = revisionsText.find('<li id="lastmod">') if lastModIndex == -1: time.sleep(5) revisionsText = almoner.getInternetText('http://devtome.com/doku.php?id=%s&do=revisions' % linkString) lastModIndex = revisionsText.find('<li id="lastmod">') if lastModIndex == -1: author.printWarning('Warning, lastmod not found on revisions page.') return False revisionsText = revisionsText[lastModIndex :] breakIndex = revisionsText.find('<br') if breakIndex == -1: author.printWarning('Warning, break not found on revisions page.') return False revisionsText = revisionsText[: breakIndex] byString = ' by ' byIndex = revisionsText.find(byString) if byIndex == -1: author.printWarning('Warning, byString not found on revisions page.') author.printWarning(linkString) return False editor = revisionsText[byIndex + len(byString) :].strip() if editor in globalEditors: return True return True #unfinished, should check globalEditors but there is no time if editor != nameLower: author.printWarning('Warning, editor (%s) is not the same as the creator (%s) in the article: %s.' % (editor, author.name, linkString)) return False return True
def getRecentTitles(archiveType, fileNameRoot, wikiAddress): 'Get all titles of the dokuwiki.' archiveFileName = fileNameRoot + '.' + archiveType if not os.path.exists(archiveFileName): return getTitles(wikiAddress) if archiveType == 'zip': zipArchive = zipfile.ZipFile(archiveFileName, 'r') zipArchive.extractall(fileNameRoot) zipArchive.close() else: mode = 'r' if archiveType == 'bz2': mode = 'r:bz2' tarArchive = tarfile.open(archiveFileName, mode) tarArchive.extractall(fileNameRoot) tarArchive.close() lastModifiedText = almoner.getFileText(os.path.join(fileNameRoot, 'last_modified.txt')) lastModifiedDatetime = datetime.datetime.strptime(lastModifiedText, globalDateTimeFormat) print('Last modified: %s' % lastModifiedDatetime) nowDatetime = datetime.datetime.today() nowMinusLast = nowDatetime - lastModifiedDatetime print('Now minus last: %s' % nowMinusLast) twentySixHours = 26 * 3600 if getSeconds(nowMinusLast) > (24 * 5 + 22) * 3600: return getTitles(wikiAddress) recentPageAddress = wikiAddress + '/doku.php?do=recent&id=start&show_changes=pages&first[0]' lines = almoner.getTextLines(almoner.getInternetText(recentPageAddress)) lineDatetime = None dateTitle = 'class="date">' linkTitle = 'class="wikilink1" title="' nameTitle = 'name="' start = 0 titleSet = set([]) while True: for lineIndex, line in enumerate(lines): if dateTitle in line: dateLine = lines[lineIndex + 1] dateString = dateLine[: dateLine.find('<')] if dateString.startswith('20'): dateString = dateString[2 :] lineDatetime = datetime.datetime.strptime(dateString, globalDateTimeFormat) if linkTitle in line: line = line[line.find(linkTitle) + len(linkTitle) :] title = line[: line.find('"')] if title != 'start': lastMinusLine = lastModifiedDatetime - lineDatetime if title in titleSet or getSeconds(lastMinusLine) > twentySixHours: titles = list(titleSet) titles.sort() return titles titleSet.add(title) if line.startswith('<input') and 'value="less recent' in line and nameTitle in line: line = line[line.find(nameTitle) + len(nameTitle) :] name = line[: line.find('"')] recentPageAddress = wikiAddress + '/doku.php?do=recent&id=start&show_changes=pages&' + name lines = almoner.getTextLines(almoner.getInternetText(recentPageAddress)) return getTitles(wikiAddress)
def addLinkPayout(self, lineStrippedLower): 'Add link payout if there is a devtome link.' lineStrippedLower = almoner.getWithoutLeadingStar(lineStrippedLower) if not lineStrippedLower.startswith('http'): return if len(self.domainPayoutSet) > 4: return originalLink = lineStrippedLower if lineStrippedLower.startswith('http://'): lineStrippedLower = lineStrippedLower[len('http://'):] elif lineStrippedLower.startswith('https://'): lineStrippedLower = lineStrippedLower[len('https://'):] if lineStrippedLower.startswith('www.'): lineStrippedLower = lineStrippedLower[len('www.'):] if lineStrippedLower.startswith('vps.'): lineStrippedLower = lineStrippedLower[len('vps.'):] if lineStrippedLower.endswith('/'): lineStrippedLower = lineStrippedLower[:-1] if lineStrippedLower in self.domainPayoutSet: return if '/' in lineStrippedLower: if self.subdomainPayout == 0: linkText = almoner.getInternetText(originalLink) if 'devtome.com' not in linkText: return self.subdomainPayout = 1 return linkText = '<a href="http://www.devtome.com/doku.php?id=earn_devcoins_by_writing"><img width="728" height="90"></a>' if lineStrippedLower != 'bitcoinaddict.com': linkText = almoner.getInternetText(originalLink) beginIndex = linkText.find('devtome.com') if beginIndex == -1: if '<title>Access denied' in linkText and 'used CloudFlare to restrict access</title>' in linkText: warningText = 'Could not open %s because access was denied by CloudFlare,' % lineStrippedLower warningText += ' so there will not be a payment for that link.' print(warningText) return self.domainPayoutSet.add(lineStrippedLower) self.payoutFifth += 2 printString = 'Domain name payout: 2, Address: %s' % lineStrippedLower while beginIndex != -1: endIndex = linkText.find('</a>', beginIndex) if endIndex == -1: print(printString) return linkString = linkText[beginIndex:endIndex] if '<img' in linkString: # if '<img' in linkString and '728' in linkString and '90' in linkString: extraPayoutFifth = getExtraPayoutFifth(lineStrippedLower) self.payoutFifth += extraPayoutFifth print('Banner payout: %s, Address: %s' % (extraPayoutFifth + 2, lineStrippedLower)) return beginIndex = linkText.find('devtome.com', endIndex) print(printString)
def addLinkPayout(self, lineStrippedLower): 'Add link payout if there is a devtome link.' lineStrippedLower = almoner.getWithoutLeadingStar(lineStrippedLower) if not lineStrippedLower.startswith('http'): return if len(self.domainPayoutSet) > 4: return originalLink = lineStrippedLower if lineStrippedLower.startswith('http://'): lineStrippedLower = lineStrippedLower[len('http://') :] elif lineStrippedLower.startswith('https://'): lineStrippedLower = lineStrippedLower[len('https://') :] if lineStrippedLower.startswith('www.'): lineStrippedLower = lineStrippedLower[len('www.') :] if lineStrippedLower.startswith('vps.'): lineStrippedLower = lineStrippedLower[len('vps.') :] if lineStrippedLower.endswith('/'): lineStrippedLower = lineStrippedLower[: -1] if lineStrippedLower in self.domainPayoutSet: return if '/' in lineStrippedLower: if self.subdomainPayout == 0: linkText = almoner.getInternetText(originalLink) if 'devtome.com' not in linkText: return self.subdomainPayout = 1 return linkText = '<a href="http://www.devtome.com/doku.php?id=earn_devcoins_by_writing"><img width="728" height="90"></a>' if lineStrippedLower != 'bitcoinaddict.com': linkText = almoner.getInternetText(originalLink) beginIndex = linkText.find('devtome.com') if beginIndex == -1: if '<title>Access denied' in linkText and 'used CloudFlare to restrict access</title>' in linkText: warningText = 'Could not open %s because access was denied by CloudFlare,' % lineStrippedLower warningText += ' so there will not be a payment for that link.' print(warningText) return self.domainPayoutSet.add(lineStrippedLower) self.payoutFifth += 2 printString = 'Domain name payout: 2, Address: %s' % lineStrippedLower while beginIndex != -1: endIndex = linkText.find('</a>', beginIndex) if endIndex == -1: print(printString) return linkString = linkText[beginIndex : endIndex] if '<img' in linkString: # if '<img' in linkString and '728' in linkString and '90' in linkString: extraPayoutFifth = getExtraPayoutFifth(lineStrippedLower) self.payoutFifth += extraPayoutFifth print('Banner payout: %s, Address: %s' % (extraPayoutFifth + 2, lineStrippedLower)) return beginIndex = linkText.find('devtome.com', endIndex) print(printString)
def addLinkPayout(self, lineStrippedLower): 'Add link payout if there is a devtome link.' lineStrippedLower = almoner.getWithoutLeadingStar(lineStrippedLower) if not lineStrippedLower.startswith('http'): return if self.domainPayout > 4: return originalLink = lineStrippedLower if lineStrippedLower.startswith('http://'): lineStrippedLower = lineStrippedLower[len('http://') :] elif lineStrippedLower.startswith('https://'): lineStrippedLower = lineStrippedLower[len('https://') :] if lineStrippedLower.startswith('www.'): lineStrippedLower = lineStrippedLower[len('www.') :] if lineStrippedLower.startswith('vps.'): lineStrippedLower = lineStrippedLower[len('vps.') :] if lineStrippedLower.endswith('/'): lineStrippedLower = lineStrippedLower[: -1] if '/' in lineStrippedLower: if self.subdomainPayout == 0: linkText = almoner.getInternetText(originalLink) if 'devtome.com' not in linkText: return self.subdomainPayout = 1 return linkText = '<a href="http://www.devtome.com/doku.php?id=earn_devcoins_by_writing"><img width="728" height="90"></a>' if lineStrippedLower != 'bitcoinaddict.com': linkText = almoner.getInternetText(originalLink) if 'devtome.com' not in linkText: return self.domainPayout += 1 self.payoutFifth += 2 printString = 'Domain name payout: 2, Address: %s' % lineStrippedLower beginIndex = linkText.find('devtome.com') while beginIndex != -1: endIndex = linkText.find('</a>', beginIndex) if endIndex == -1: print(printString) return linkString = linkText[beginIndex : endIndex] if '<img' in linkString: # if '<img' in linkString and '728' in linkString and '90' in linkString: extraPayoutFifth = getExtraPayoutFifth(lineStrippedLower) self.payoutFifth += extraPayoutFifth print('Banner payout: %s, Address: %s' % (extraPayoutFifth + 2, lineStrippedLower)) return beginIndex = linkText.find('devtome.com', endIndex) print(printString)
def getExtraPayoutFifth(lineStrippedLower): """ The extra payout is calculated from the Alexa page rank of the site. This is used rather than the Alexa page views because the page view information is only available to javascript interpreters, a read() command from urllib will only get the page rank. The lower the page rank, the higher the page views, so the reciprocal of the page rank is used to get the approximate number of dollars per month in proportion to page views. Because two fifths have already been added in addLinkPayout, two fifths are subtracted from the payout fifth, to a minimum of one, to determine the extra payout fifths. """ alexaLink = 'http://www.alexa.com/siteinfo/%s' % lineStrippedLower alexaText = almoner.getInternetText(alexaLink) isRankedNumberIndex = alexaText.find('is ranked number') if isRankedNumberIndex < 0: return 1 alexaText = alexaText[isRankedNumberIndex + len('is ranked number') + 1:] inIndex = alexaText.find('in') if inIndex < 0: return 1 alexaText = alexaText[: inIndex].strip().replace(',', '') rank = int(alexaText) if rank < 0: return 1 dollarsPerMonth = 15000000 / rank / 4 # banner add will grab one quarter of the revenue if lineStrippedLower == 'bitcoinaddict.com': dollarsPerMonth = dollarsPerMonth * 8 / 10 return max(dollarsPerMonth / globalDollarDivider - 2, 1)
def addPostPayout(self, lineStrippedLower): 'Add post payout if there is a devtome link.' lineStrippedLower = almoner.getWithoutLeadingStar(lineStrippedLower) if not lineStrippedLower.startswith('http'): return if len(self.postPayoutSet) > 4: return linkText = almoner.getInternetText(lineStrippedLower) if '#' in lineStrippedLower: lineStrippedLower = lineStrippedLower[: lineStrippedLower.find('#')] if ';' in lineStrippedLower: lineStrippedLower = lineStrippedLower[: lineStrippedLower.find(';')] messageString = '<a class="message_number" style="vertical-align: middle;" href="' + lineStrippedLower if messageString not in linkText: return postBeginIndex = linkText.find(messageString) postBeginIndex = linkText.find('<div class="post"', postBeginIndex) if postBeginIndex == -1: return postEndIndex = linkText.find('<td valign="bottom"', postBeginIndex + 1) linkText = linkText[postBeginIndex : postEndIndex] if 'devtome.com' not in linkText: return if linkText in self.postPayoutSet: return self.postPayoutSet.add(linkText) self.postWords += len(linkText.split())
def getSourceText(address): 'Get the devtome source text for the address.' text = almoner.getInternetText(address) textAreaTagIndex = text.find('<textarea') if textAreaTagIndex == -1: print('') print('Warning, no textarea tag found for:') print(address) print('') return '' tagEndIndex = text.find('>', textAreaTagIndex) if tagEndIndex == -1: print('') print('Warning, no tag end found for:') print(address) print('') return '' textAreaEndTagIndex = text.find('</textarea>', tagEndIndex) if textAreaEndTagIndex == -1: print('') print('Warning, no textarea end tag found for:') print(address) print('') return '' return text[tagEndIndex + 1 : textAreaEndTagIndex].lstrip()
def getIsLastEditByAuthor(linkString, name): 'Determine if the last edit was by the author.' if name == 'Knotwork' or name == 'Kumala' or name == 'Icoin' or name == 'Xenophaux' or name == 'Unthinkingbit': return True revisionsText = almoner.getInternetText('http://devtome.com/doku.php?id=%s&do=revisions' % linkString) time.sleep(1) lastModIndex = revisionsText.find('<li id="lastmod">') if lastModIndex == -1: print('Warning, lastmod not found on revisions page.') return False revisionsText = revisionsText[lastModIndex :] breakIndex = revisionsText.find('<br') if breakIndex == -1: print('Warning, break not found on revisions page.') return False revisionsText = revisionsText[: breakIndex] byString = ' by ' byIndex = revisionsText.find(byString) if byIndex == -1: print('Warning, byString not found on revisions page.') print(linkString) return False editor = revisionsText[byIndex + len(byString) :].strip() if editor == 'raptorak' or editor == 'twobits' or editor == 'unthinkingbit' or editor == 'weisoq' or editor == 'xenophaux' or editor == 'nsddev' or editor == 'ftgcoin': return True if editor != name.lower(): print('Warning, editor is not the same as the name.') print(editor) print(linkString) return False return True
def getExtraPayoutFifth(lineStrippedLower): """ The extra payout is calculated from the Alexa page rank of the site. This is used rather than the Alexa page views because the page view information is only available to javascript interpreters, a read() command from urllib will only get the page rank. The lower the page rank, the higher the page views, so the reciprocal of the page rank is used to get the approximate number of dollars per month in proportion to page views. Because two fifths have already been added in addLinkPayout, two fifths are subtracted from the payout fifth, to a minimum of one, to determine the extra payout fifths. """ alexaLink = 'http://www.alexa.com/siteinfo/%s' % lineStrippedLower alexaText = almoner.getInternetText(alexaLink) isRankedNumberIndex = alexaText.find('is ranked number') if isRankedNumberIndex < 0: return 1 alexaText = alexaText[isRankedNumberIndex + len('is ranked number') + 1:] inIndex = alexaText.find('in') if inIndex < 0: return 1 alexaText = alexaText[: inIndex].strip().replace(',', '') rank = int(alexaText) if rank < 0: return 1 dollarsPerMonth = 60000000 / rank # if lineStrippedLower == 'bitcoinaddict.com': # dollarsPerMonth = dollarsPerMonth * 8 / 10 return max(dollarsPerMonth / 40 - 2, 1) # roundedUp(240 / 5 * 2)
def addSignaturePayout(self, lineStrippedLower): 'Add signature payout if there is a devtome link.' lineStrippedLower = almoner.getWithoutLeadingStar(lineStrippedLower) if not lineStrippedLower.startswith('http'): return linkText = almoner.getInternetText(lineStrippedLower) if 'devtome.com' not in linkText: return if self.signaturePayout: return self.signaturePayout = True postString = '<td><b>Posts: </b></td>' postIndex = linkText.find(postString) if postIndex == -1: return postEndIndex = postIndex + len(postString) postNumberEndIndex = linkText.find('</td>', postEndIndex + 1) if postNumberEndIndex == -1: return postNumberString = linkText[postEndIndex : postNumberEndIndex].strip() if '>' in postNumberString: postNumberString = postNumberString[postNumberString.find('>') + 1 :] postNumber = int(postNumberString) if postNumber > 1000: self.payoutFifth += 2 print('Big signature payout: 2') else: self.payoutFifth += 1 print('Small signature payout: 1')
def addPostPayout(self, lineStrippedLower): 'Add post payout if there is a devtome link.' lineStrippedLower = almoner.getWithoutLeadingStar(lineStrippedLower) if not lineStrippedLower.startswith('http'): return if self.postPayout > 4: return linkText = almoner.getInternetText(lineStrippedLower) if '#' in lineStrippedLower: lineStrippedLower = lineStrippedLower[: lineStrippedLower.find('#')] if ';' in lineStrippedLower: lineStrippedLower = lineStrippedLower[: lineStrippedLower.find(';')] messageString = '<a class="message_number" style="vertical-align: middle;" href="' + lineStrippedLower if messageString not in linkText: return postBeginIndex = linkText.find(messageString) postBeginIndex = linkText.find('<div class="post"', postBeginIndex) if postBeginIndex == -1: return postEndIndex = linkText.find('<td valign="bottom"', postBeginIndex + 1) linkText = linkText[postBeginIndex : postEndIndex] if 'devtome.com' not in linkText: return self.postWords += len(linkText.split()) self.postPayout += 1
def getSourceText(address): 'Get the devtome source text for the address.' text = almoner.getInternetText(address) textAreaTagIndex = text.find('<textarea') if textAreaTagIndex == -1: print('') print('Warning, no textarea tag found for:') print(address) print('') return '' tagEndIndex = text.find('>', textAreaTagIndex) if tagEndIndex == -1: print('') print('Warning, no tag end found for:') print(address) print('') return '' textAreaEndTagIndex = text.find('</textarea>', tagEndIndex) if textAreaEndTagIndex == -1: print('') print('Warning, no textarea end tag found for:') print(address) print('') return '' return text[tagEndIndex + 1:textAreaEndTagIndex].lstrip()
def getRecentNames(fileName, nowDatetime, previousDevtomeName, wikiAddress): 'Get the recent user names.' lastModifiedText = almoner.getFileText(fileName) lastModifiedDatetime = nowDatetime - timedelta(30) if lastModifiedText != '': lines = almoner.getTextLines(lastModifiedText) if len(lines) > 0: words = lines[0].split(',') if len(words) > 1: lastModifiedDatetime = datetime.datetime.strptime( words[1], globalDateTimeFormat) print('Last modified: %s' % lastModifiedDatetime) nowMinusLast = nowDatetime - lastModifiedDatetime paidNameSet = getPaidNameSet(previousDevtomeName) print('Now minus last: %s' % nowMinusLast) twentySixHours = 26 * 3600 startChangesAddress = wikiAddress + '/doku.php?do=recent&id=start&show_changes=pages&' recentPageAddress = startChangesAddress + 'first[0]' lineDatetime = None dateTitle = 'class="date">' linkTitle = 'class="wikilink1" title="' nameTitle = 'name="' names = [] while True: print('Parsing: %s' % recentPageAddress) lines = almoner.getTextLines( almoner.getInternetText(recentPageAddress)) for lineIndex, line in enumerate(lines): if dateTitle in line: dateLine = lines[lineIndex + 1] dateString = dateLine[:dateLine.find('<')] if dateString.startswith('20'): dateString = dateString[len('20'):] lineDatetime = datetime.datetime.strptime( dateString, globalDateTimeFormat) if linkTitle in line: line = line[line.find(linkTitle) + len(linkTitle):] name = line[:line.find('"')] if name != 'start': lastMinusLine = lastModifiedDatetime - lineDatetime if getSeconds(lastMinusLine) > twentySixHours: names.sort() return names if name.startswith('wiki:user:'******'wiki:user:'******'<input' ) and 'value="less recent' in line and nameTitle in line: line = line[line.find(nameTitle) + len(nameTitle):] name = line[:line.find('"')] recentPageAddress = startChangesAddress + name time.sleep(1) return None
def getRecentNames(fileName, nowDatetime, previousDevtomeName, wikiAddress): 'Get the recent user names.' lastModifiedText = almoner.getFileText(fileName) lastModifiedDatetime = nowDatetime - timedelta(30) if lastModifiedText != '': lines = almoner.getTextLines(lastModifiedText) if len(lines) > 0: words = lines[0].split(',') if len(words) > 1: lastModifiedDatetime = datetime.datetime.strptime(words[1], globalDateTimeFormat) print('Last modified: %s' % lastModifiedDatetime) nowMinusLast = nowDatetime - lastModifiedDatetime paidNameSet = getPaidNameSet(previousDevtomeName) print('Now minus last: %s' % nowMinusLast) twentySixHours = 26 * 3600 startChangesAddress = wikiAddress + '/doku.php?do=recent&id=start&show_changes=pages&' recentPageAddress = startChangesAddress + 'first[0]' lineDatetime = None dateTitle = 'class="date">' linkTitle = 'class="wikilink1" title="' nameTitle = 'name="' names = [] while True: print('Parsing: %s' % recentPageAddress) lines = almoner.getTextLines(almoner.getInternetText(recentPageAddress)) for lineIndex, line in enumerate(lines): if dateTitle in line: dateLine = lines[lineIndex + 1] dateString = dateLine[: dateLine.find('<')] if dateString.startswith('20'): dateString = dateString[len('20') :] lineDatetime = datetime.datetime.strptime(dateString, globalDateTimeFormat) if linkTitle in line: line = line[line.find(linkTitle) + len(linkTitle) :] name = line[: line.find('"')] if name != 'start': lastMinusLine = lastModifiedDatetime - lineDatetime if getSeconds(lastMinusLine) > twentySixHours: names.sort() return names if name.startswith('wiki:user:'******'wiki:user:'******'<input') and 'value="less recent' in line and nameTitle in line: line = line[line.find(nameTitle) + len(nameTitle) :] name = line[: line.find('"')] recentPageAddress = startChangesAddress + name time.sleep(1) return None
def getTitles(wikiAddress): 'Get all titles of the dokuwiki.' indexDepth = 0 popularPageAddress = wikiAddress + '/doku.php?id=start&idx=wiki%3Auser' lines = almoner.getTextLines(almoner.getInternetText(popularPageAddress)) prefix = '?id=' prefixLength = len(prefix) titles = [] for line in lines: if line.startswith('</ul>'): if indexDepth > 0: indexDepth -= 1 if indexDepth > 0 and 'class="wikilink1"' in line: prefixIndex = line.find(prefix) + prefixLength title = line[prefixIndex:] quoteIndex = title.find('"') if len(title) > 0: titles.append(title[:quoteIndex]) if line == '<ul class="idx">': indexDepth += 1 return titles
def getTitles(wikiAddress): 'Get all titles of the dokuwiki.' indexDepth = 0 popularPageAddress = wikiAddress + '/doku.php?id=start&idx=wiki%3Auser' lines = almoner.getTextLines(almoner.getInternetText(popularPageAddress)) prefix = '?id=' prefixLength = len(prefix) titles = [] for line in lines: if line.startswith('</ul>'): if indexDepth > 0: indexDepth -= 1 if indexDepth > 0 and 'class="wikilink1"' in line: prefixIndex = line.find(prefix) + prefixLength title = line[prefixIndex :] quoteIndex = title.find('"') if len(title) > 0: titles.append(title[: quoteIndex]) if line == '<ul class="idx">': indexDepth += 1 return titles
def addSignaturePayout(self, lineStrippedLower): 'Add signature payout if there is a devtome link.' if len(self.signaturePageSet) > 2: return lineStrippedLower = almoner.getWithoutLeadingStar(lineStrippedLower) if not lineStrippedLower.startswith('http'): return linkText = almoner.getInternetText(lineStrippedLower) if 'devtome.com' not in linkText: return if linkText in self.signaturePageSet: return payoutFifth = 0 if 'bitcointalk.org' in lineStrippedLower: payoutFifth = getPayoutFifthBitcoin(linkText) elif 'ppcointalk.org' in lineStrippedLower: payoutFifth = getPayoutFifthPpcoin(linkText) elif 'terracointalk.org' in lineStrippedLower: payoutFifth = getPayoutFifthTerracoin(linkText) if payoutFifth > 0: self.signaturePageSet.add(linkText) self.payoutFifth += payoutFifth
def getRecentTitles(archiveType, fileNameRoot, wikiAddress): 'Get all titles of the dokuwiki.' archiveFileName = fileNameRoot + '.' + archiveType if not os.path.exists(archiveFileName): return getTitles(wikiAddress) if archiveType == 'zip': zipArchive = zipfile.ZipFile(archiveFileName, 'r') zipArchive.extractall(fileNameRoot) zipArchive.close() else: mode = 'r' if archiveType == 'bz2': mode = 'r:bz2' tarArchive = tarfile.open(archiveFileName, mode) tarArchive.extractall(fileNameRoot) tarArchive.close() lastModifiedText = almoner.getFileText( os.path.join(fileNameRoot, 'last_modified.txt')) lastModifiedDatetime = datetime.datetime.strptime(lastModifiedText, globalDateTimeFormat) print('Last modified: %s' % lastModifiedDatetime) nowDatetime = datetime.datetime.today() nowMinusLast = nowDatetime - lastModifiedDatetime print('Now minus last: %s' % nowMinusLast) twentySixHours = 26 * 3600 if getSeconds(nowMinusLast) > (24 * 5 + 22) * 3600: return getTitles(wikiAddress) recentPageAddress = wikiAddress + '/doku.php?do=recent&id=start&show_changes=pages&first[0]' lines = almoner.getTextLines(almoner.getInternetText(recentPageAddress)) lineDatetime = None dateTitle = 'class="date">' linkTitle = 'class="wikilink1" title="' nameTitle = 'name="' start = 0 titleSet = set([]) while True: for lineIndex, line in enumerate(lines): if dateTitle in line: dateLine = lines[lineIndex + 1] dateString = dateLine[:dateLine.find('<')] if dateString.startswith('20'): dateString = dateString[2:] lineDatetime = datetime.datetime.strptime( dateString, globalDateTimeFormat) if linkTitle in line: line = line[line.find(linkTitle) + len(linkTitle):] title = line[:line.find('"')] if title != 'start': lastMinusLine = lastModifiedDatetime - lineDatetime if title in titleSet or getSeconds( lastMinusLine) > twentySixHours: titles = list(titleSet) titles.sort() return titles titleSet.add(title) if line.startswith( '<input' ) and 'value="less recent' in line and nameTitle in line: line = line[line.find(nameTitle) + len(nameTitle):] name = line[:line.find('"')] recentPageAddress = wikiAddress + '/doku.php?do=recent&id=start&show_changes=pages&' + name lines = almoner.getTextLines( almoner.getInternetText(recentPageAddress)) return getTitles(wikiAddress)