コード例 #1
0
ファイル: chanscrape.py プロジェクト: process/chanscrape
def getThreadIds(board):
  urls = []
  baseUrl = "http://boards.4chan.org/%s/" % board
  for i in range(0,11):
    urls.append(baseUrl + str(i))
  pages = downloader.downloadUrls(urls)
  threadIds = []
  for page in pages:
    # The regular expression will return all thread ids as strings
    threadIds.extend(re.findall('class="thread" id="t([0-9]+)"', page));
  threadIds = list(set(threadIds)) # Clear out duplicates, if any
  return threadIds
コード例 #2
0
ファイル: chanscrape.py プロジェクト: process/mu-scrape
def getThreadIds(board):
  urls = []
  baseUrl = "http://api.4chan.org/%s/" % board
  for i in range(0,11):
    urls.append(baseUrl + str(i) + ".json")
  pages = downloader.downloadUrls(urls)
  threadIds = []
  for page in pages:
    pageData = json.loads(page)
    for thread in pageData['threads']:
      threadIds.append(thread['posts'][0]['no']);
  threadIds = list(set(threadIds)) # Clear out duplicates, if any
  return threadIds
コード例 #3
0
ファイル: chanscrape_html.py プロジェクト: process/chanscrape
def getBoardInfo(board):
  urls = []
  baseUrl = "http://boards.4chan.org/%s/" % board
  for i in range(0,11):
    urls.append(baseUrl + str(i))
  return BoardInfo(downloader.downloadUrls(urls))
コード例 #4
0
ファイル: chanscrape.py プロジェクト: process/mu-scrape
def getAllThreads(board):
  urls = []
  for id in getThreadIds(board):
    urls.append("https://boards.4chan.org/%s/res/%s.json" % (board, id))
  threads = downloader.downloadUrls(urls)
  return map(json.loads, threads)