def getThreadIds(board): urls = [] baseUrl = "http://boards.4chan.org/%s/" % board for i in range(0,11): urls.append(baseUrl + str(i)) pages = downloader.downloadUrls(urls) threadIds = [] for page in pages: # The regular expression will return all thread ids as strings threadIds.extend(re.findall('class="thread" id="t([0-9]+)"', page)); threadIds = list(set(threadIds)) # Clear out duplicates, if any return threadIds
def getThreadIds(board): urls = [] baseUrl = "http://api.4chan.org/%s/" % board for i in range(0,11): urls.append(baseUrl + str(i) + ".json") pages = downloader.downloadUrls(urls) threadIds = [] for page in pages: pageData = json.loads(page) for thread in pageData['threads']: threadIds.append(thread['posts'][0]['no']); threadIds = list(set(threadIds)) # Clear out duplicates, if any return threadIds
def getBoardInfo(board): urls = [] baseUrl = "http://boards.4chan.org/%s/" % board for i in range(0,11): urls.append(baseUrl + str(i)) return BoardInfo(downloader.downloadUrls(urls))
def getAllThreads(board): urls = [] for id in getThreadIds(board): urls.append("https://boards.4chan.org/%s/res/%s.json" % (board, id)) threads = downloader.downloadUrls(urls) return map(json.loads, threads)