def main(): if len(sys.argv) < 2 or len(sys.argv) > 3: print("Quick and dirty 4chan Archiver") print("%s - Save the JSON and all images for an 4chan post." % (sys.argv[0])) print("\tUsage: %s <board> <thread_id>" % (sys.argv[0])) sys.exit(1) board_name = sys.argv[1] thread_id = sys.argv[2] # grab the first thread on the board by checking first page board = basc_py4chan.Board(board_name) thread = board.get_thread(thread_id) # create folders according to chan.arc standard path = os.path.join(os.getcwd(), "4chan", board_name, thread_id) images_path = os.path.join(path, "images") mkdirs(images_path) # archive the thread JSON url_builder = basc_py4chan.Url(board_name) json_url = url_builder.thread_api_url(thread_id) print(url_builder.thread_api_url(thread_id)) download_json(os.path.join(path, "%s.json" % thread_id), json_url) # record the url of every file on the first thread, even extra files in posts for img in thread.file_objects(): print("Downloading %s..." % img.file_url) download_file(os.path.join(images_path, "%s" % img.filename), img.file_url)
async def vrdoom_task(): await bot.wait_until_ready() threadlinks = list() async for things in bot.get_channel(vrdoomchan).history(): threadlinks.append(things.content) while True: channel = bot.get_channel(vrdoomchan) vr = basc_py4chan.Board('vr') vrids = vr.get_all_thread_ids() for x in vrids: doom = vr.get_thread(x) if "DOOM THREAD" in doom.topic.text_comment: doompic = doom.topic.file.file_url doomurl = f"https://boards.4channel.org/vr/thread/{x}" doomtitle = doom.topic.subject doomdate = doom.topic.datetime e = discord.Embed(title=doomtitle, url=doomurl, color=0x9ab89f, timestamp=doomdate) e.set_image(url=doompic) if doomurl not in threadlinks: await channel.send(embed=e) threadlinks.append(doomurl) return await asyncio.sleep(10)
def tryToRespondCorrectly(boardName, message): allThreadsFromChosenBoard = basc_py4chan.Board(boardName).get_all_threads() chosenWords = getFiveWordsFromListOfWords(message) print chosenWords chosenThread = getRandomThreadBasedOnSelectedWords(boardName, chosenWords, allThreadsFromChosenBoard) if not chosenThread: return False replyToUser = "" posts = chosenThread.all_posts fivePosts = [] if len(posts) > 5: while len(fivePosts)<5: post = random.choice(posts) if post not in fivePosts: fivePosts.append(post) else: fivePosts = posts replyToUser += "Original poster said: " replyToUser += posts[0].text_comment replyToUser += "\n" for post in fivePosts: if post.name: replyToUser += post.name else: replyToUser += "Anonymous" replyToUser += " says:\n" replyToUser += post.text_comment replyToUser += "\n\n" return [fivePosts, replyToUser.encode('utf-8')]##################
def crawl_thread(self, board, thread): b = basc_py4chan.Board(board) t = b.get_thread(thread_id=thread, raise_404=True) cnt_new = 0 for f in t.file_objects(): if f.file_deleted: continue if f.file_width < IMAGE_MIN_WIDTH or \ f.file_height < IMAGE_MIN_HEIGHT or \ f.file_size < IMAGE_MIN_SIZE or \ f.file_width / f.file_height > IMAGE_MAX_HW_RATIO or \ f.file_height / f.file_width > IMAGE_MAX_HW_RATIO: continue cnt = self.db.query(Image).filter_by(filename=f.filename).count() # should it be unique if cnt != 0: continue path = '%s/%s/%s' % (self.__sitename__, board, f.filename) img = Image(site=self.__sitename__, board=board, thread=thread, filename_original=f.filename_original, filename=f.filename, width=f.file_width, height=f.file_height, size=f.file_size, hash=f.file_md5_hex, path=path, url=f.file_url, priority=0, status=False) self.db.add(img) cnt_new += 1 self.download_queue.put(DownloadTask(id=img.id, priority=img.priority)) self.db.commit() return cnt_new
def main(): # grab the first thread on the board by checking first page board = basc_py4chan.Board('v') all_thread_ids = board.get_all_thread_ids() first_thread_id = all_thread_ids[0] thread = board.get_thread(first_thread_id) # thread information print(thread) print('Sticky?', thread.sticky) print('Closed?', thread.closed) # topic information topic = thread.topic print('Topic Repr', topic) print('Postnumber', topic.post_number) print('Timestamp', topic.timestamp) print('Datetime', repr(topic.datetime)) print('Subject', topic.subject) print('Comment', topic.text_comment) print('Replies', thread.replies) # file information for f in thread.file_objects(): print('Filename', f.filename) print(' Filemd5hex', f.file_md5_hex) print(' Fileurl', f.file_url) print(' Thumbnailurl', f.thumbnail_url) print()
async def chan(self, ctx, board: str = "", arg: str = ""): """Display random post (image in spoiler)""" # If no board specified, or random one -> choose random board if not board or board.lower() == "random": board_list = [ 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'gif', 'd', 'h', 'hr', 'k', 'm', 'o', 'p', 'r', 's', 't', 'u', 'v', 'vg', 'w', 'wg', 'i', 'ic', 'r9k', 'cm', 'hm', 'y', '3', 'adv', 'an', 'cgl', 'ck', 'co', 'diy', 'fa', 'fit', 'hc', 'int', 'jp', 'lit', 'mlp', 'mu', 'n', 'po', 'pol', 'sci', 'soc', 'sp', 'tg', 'toy', 'trv', 'tv', 'vp', 'wsg', 'x' ] board = random.choice(board_list) # Attempt to download board try: b = basc_py4chan.Board(board) threads = b.get_all_threads() # Invalid board specified (library uses requests) except requests.exceptions.HTTPError: msg = await ctx.send("`/{0}/` doesn't exist.".format(board)) await msg.add_reaction(basic_emoji.get("Si")) return result = "" post = None # Finding a post with text if arg.lower() == "text" or arg.lower() == "txt": # Try a random one until successful while not post or not post.text_comment: thread = random.choice(threads) post = random.choice(thread.posts) result = post.text_comment # Finding a post with image elif arg.lower() == "image" or arg.lower() == "img": while not post or not post.has_file: thread = random.choice(threads) post = random.choice(thread.posts) # Put image in a spoiler result = "|| {0} ||\n{1}".format(post.file_url, post.text_comment) # If no option specified -> find a post with text, image optional else: while not post or not post.text_comment: thread = random.choice(threads) post = random.choice(thread.posts) if post.has_file: result = "|| {0} ||\n{1}".format(post.file_url, post.text_comment) else: result = post.text_comment # Split into smaller parts if a post is too long (>2000 characters) for segment in wrap(result, 1990): await ctx.send(segment)
def get_posts(board_id, postno): board = basc_py4chan.Board(board_id) thread = board.get_thread(postno) post_list = [] post_replies = {} if thread != None: all_posts = thread.all_posts for post in all_posts: post_values = {} post_values['no'] = post.post_id if post.post_id == postno: post_values['replies'] = len(thread.posts) - 1 post_values['sticky'] = int(thread.sticky) else: post_values['replies'] = 0 post_values['sticky'] = 0 post_values['closed'] = int(thread.closed) post_values['name'] = post.name post_values['time'] = post.timestamp post_values['semantic_url'] = post.semantic_url post_values['images'] = int(post.has_file) post_values['has_file'] = int(post.has_file) if post.has_file: post_values['ext'] = post.file_extension post_values['thumbUrl'] = post.thumbnail_url post_values['imgUrl'] = post.file_url post_values['filename'] = post.filename post_values['file_deleted'] = int(post.file_deleted) if post.comment: replies = utils.collect_replies(post.comment) if replies: for reply in replies: if reply not in post_replies: post_replies[reply] = [] post_replies[reply].append(post.post_id) if post.subject and post.comment: post_values['com'] = '<b>{}</b><br>'.format( post.subject) + post.comment elif not post.subject and post.comment: post_values['com'] = post.comment elif post.subject and not post.comment: post_values['com'] = '<b>{}</b>'.format(post.subject) post_list.append(post_values) post_list = utils.parse_posts(post_list, post_replies) pyotherside.send('posts', post_list) else: pyotherside.send('posts_status', post_list)
def search_board(target_board_name): """ Allows user to write links of files from a given board to a file. Allows user to download files of a given board. @param target_board_name the board of focus to look for ygyl threads. """ print("Board of focus: " + target_board_name) all_threads = basc.Board(target_board_name).get_all_threads() target_threads = [ thread for thread in all_threads if search_for_ygyl(thread.topic) ] if not target_threads: # check if there are any threads those topic post contain "ygyl" print("No current ygyl threads in /" + target_board_name + "/.") return all_target_posts = [] for thread in target_threads: thread_files = thread.all_posts all_target_posts += thread_files files_from_posts = [ post.file for post in all_target_posts if post.has_file ] files_object_dictionary = f_o_dict(files_from_posts) files_url_dictionary = file_url_dict(files_from_posts) save_links(target_board_name, files_url_dictionary) to_download(target_board_name, files_object_dictionary)
def random_thread(self, args): """Returns a random thread given a board Parameters ---------- args : String Board Returns ------- Tuple 2 Strings with the thread url and caption """ query = ''.join(args) board = basc_py4chan.Board(str(query)) thread_ids = board.get_all_thread_ids() thread_ids = [str(id) for id in thread_ids ] # need to do this so str.join below works random_thread = randint(0, (len(thread_ids) - 1)) thread = board.get_thread(int(thread_ids[random_thread])) pictures = [] url = r'http://boards.4chan.org/'.__add__( str(query)).__add__('/thread/').__add__( str(thread_ids[random_thread])) for f in thread.file_objects(): pictures.append(f.file_url) return url, pictures[0]
def getthread(boardname='vg', threadname='/tfg'): board = basc_py4chan.Board(boardname) threads = board.get_all_threads() for thread in threads: if thread.topic.subject is not None: if threadname in thread.topic.subject: return board.get_thread(thread.id) break
def __init__(self, board): self.log = Log() self.log.info("Getting /{}/...".format(board)) try: self.board = chan.Board(board) self.log.info("/{}/ was fetched successfully".format(board)) except Exception as e: self.log.error("Failed to fetch /{}/ -- {}".format(board, e))
def get_yl(section): board = basc_py4chan.Board(section) all_thread_ids = board.get_all_thread_ids() conn = sqlite3.connect("/home/ubuntu/ylyl-basc/example.db") # print("contents before:") # for row in conn.execute("select id, threadid, date, files, downloaded, section, active from threads"): # print(row) for thread_id in all_thread_ids: thread = board.get_thread(thread_id) try: topic = thread.topic except: continue thecomment = str(topic.comment) thesubject = str(topic.subject) if "YLYL" in thesubject or "ylyl" in thesubject or "Ylyl" in thesubject or "YLYL" \ in thecomment or "ylyl" in thecomment or "Ylyl" in thecomment: yl_record = [] yl_record.append(thread_id) #the thread id yl_record.append(topic.datetime) #the date numfiles = 0 for f in thread.file_objects(): numfiles+=1 yl_record.append(numfiles) #number of files yl_record.append(0) #how many downloaded yl_record.append(section) #which section yl_record.append(1) #set thread active threadexists = 0 # doest not exist in db for row in conn.execute("select exists(select 1 from threads where threadid is (?))", [thread_id]): threadexists = row[0] if threadexists == 0: try: with conn: conn.execute("insert into threads(threadid, date, files, downloaded, section, active) \ values (?, ?, ?, ?, ?, ?)", yl_record) except sqlite3.IntegrityError: print("ERROR") else: try: with conn: conn.execute("update threads set files = (?) \ where threadid is (?)", (yl_record[2],yl_record[0])) except sqlite3.OperationalError: print("error...updating thread") # print("contents after lurking:") # for row in conn.execute("select id, threadid, date, files, downloaded, section, active from threads"): # print(row) conn.close()
def check_hate(): filename1 = 'crawled_data_true.csv' filename2 = 'crawled_data_false.csv' if os.path.isfile(filename1): map_threads_true = set(pd.read_csv(filename1)["post_id"].to_list()) map_threads_false = set(pd.read_csv(filename2)["post_id"].to_list()) else: map_threads_true = set() map_threads_false = set() start_time = time.time() board_name = 'pol' try: set_hate = {'ABC', 'coolies', 'chink a billies', 'bamboo coons', 'chinig', 'slopehead', 'chink a billy', 'Chinese wetback', 'bamboo coon', 'ching chong', 'coolie', 'chigger', 'slope', 'slant', 'slant eye', 'wink', 'whoriental', 'gooklet', 'gookette', 'gook eyed', 'gookie', 'goloid', 'gink','dog eater', 'yellow invader', 'rice nigger'} board = basc_py4chan.Board(board_name) all_thread_ids = board.get_all_thread_ids() thread_count = 0 hate_count = 0 for id in all_thread_ids: thread_count += 1 if id in map_threads_true: hate_count += 1 continue if id in map_threads_false: continue thread = board.get_thread(id) if thread == None or thread.closed == True: thread_count += -1 continue for threadcontent in thread.all_posts: if any(s.lower() in threadcontent.comment.lower() for s in set_hate): hate_count += 1 map_threads_true.add(id) break else: map_threads_false.add(id) df1 = pd.DataFrame.from_dict(map_threads_true) df1.columns = ["post_id"] df1.to_csv(filename1, index=False) df2 = pd.DataFrame.from_dict(map_threads_false) df2.columns = ["post_id"] df2.to_csv(filename2, index=False) percentage = "{:.2%}".format(hate_count / thread_count) print("--- %s seconds ---" % (time.time() - start_time)) return percentage except : return "error with the server, please refresh"
def picture_download(path, brd): if not os.path.exists(path): os.mkdir(path) else: files = glob.glob(path + '*') for g in files: os.remove(g) os.rmdir(path) os.mkdir(path) board = basc_py4chan.Board(brd) t = time.thread_time() all_thread_ids = board.get_all_thread_ids() for z in all_thread_ids: if t <= 7: thread = board.get_thread(z) # print thread information print(thread) print('Sticky?', thread.sticky) print('Closed?', thread.closed) print('Replies:', len(thread.replies)) # print topic post information topic = thread.topic print('Topic Repr', topic) print('Postnumber', topic.post_number) print('Timestamp', topic.timestamp) print('Datetime', repr(topic.datetime)) print('Subject', topic.subject) print('Comment', topic.text_comment) smth = topic.text_comment[:65].translate({ord('?'): " question "}) smth = smth.translate({ord('<'): " less than "}) smth = smth.translate({ord('>'): "Implying "}) smth = smth.translate({ord(':'): ";"}) smth = smth.translate({ord('"'): "'"}) smth = smth.translate({ord('/'): " slash "}) smth = smth.translate({ord('\n'): " newLine "}) smth = smth.translate({ord('\\'): " backslash "}) info = "folder" + smth + str(random()) pather = path + info + "\\" os.mkdir(pather) for x, y in zip(thread.replies, thread.file_objects()): print(t) print('Filename', y.filename_original) print('Fileurl', y.file_url) urllib.request.urlretrieve(y.file_url, pather + y.filename_original) print() print(x.post_id) print(x.text_comment) else: break
def main(): # grab the first thread on the board by checking first page board = basc_py4chan.Board('v') all_thread_ids = board.get_all_thread_ids() first_thread_id = all_thread_ids[0] thread = board.get_thread(first_thread_id) # display the url of every file on the first thread, even extra files in posts for url in thread.files(): print(url)
def check_hate(): print("here") board_name = 'pol' # filename = 'crawled_data.csv' # chinese_hate_speech_filename = "hatebase_vocab_chinese.csv" # asian_hate_speech_filename = "hatebase_vocab_asian.csv" ##########DONOT EDIT BELOW################################################################## # while True: try: # df_chinese = pd.read_csv(chinese_hate_speech_filename) # df_asian = pd.read_csv(asian_hate_speech_filename) # list_id = [] # if os.path.isfile(filename): # list_id = pd.read_csv(filename)["post_id"].to_list() # df_total = pd.concat([df_chinese, df_asian]) # list_hate = df_total["term"] list_hate = ['ABCs', 'ABC', 'spinks', 'spink', 'slopey', 'winks', 'slopes', 'slopies', 'slants', 'slopeheads', 'slant eyes', 'sideways vaginas', 'sideways cooters', 'sideways pussies', 'coolies', 'chonkies', 'chunkies', 'Chinese wetbacks', 'ching chongs', 'chinigs', 'chink a billies', 'chiggers', 'celestials', 'bamboo coons', 'chinig', 'sideways cooter', 'slopehead', 'chink a billy', 'Chinese wetback', 'bamboo coon', 'ching chong', 'coolie', 'slopy', 'chonky', 'chunky', 'sideways pussy', 'sideways v****a', 'chigger', 'slope', 'slant', 'slant eye', 'wink', 'celestial', 'whoriental', 'whorientals', 'gooky eyes', 'gooklets', 'gooklet', 'gookettes', 'gookette', 'gook eyed', 'gookies', 'gookie', 'goloids', 'goloid', 'ginks', 'gink', 'dog eaters', 'dog eater', 'yellow invaders', 'rice niggers', 'yellow invader', 'rice nigger'] # col_names = ["text_comment", "datetime", "post_id"] # df_asian_hate = pd.DataFrame(columns = col_names) # get the board we want board = basc_py4chan.Board(board_name) # select the first thread on the board all_thread_ids = board.get_all_thread_ids() # thread_list = [] thread_count = 0 hate_count = 0 for id in all_thread_ids: first_thread_id = id thread = board.get_thread(first_thread_id) if thread == None: continue topic = thread.topic for thread in thread.all_posts: thread_count += 1 if any(s.lower() in thread.comment.lower() for s in list_hate): hate_count += 1 # temp_dict = {} # temp_dict["text_comment"] = thread.text_comment # temp_dict["datetime"] = thread.datetime # temp_dict["post_id"] = thread.post_id # df_asian_hate = df_asian_hate.append(temp_dict, ignore_index = True) percentage = "{:.2%}".format(hate_count / thread_count) return percentage # if not os.path.isfile(filename): # df_asian_hate.to_csv(filename, header='column_names') # else: # else it exists so append without writing the header # df_asian_hate.to_csv(filename, mode='a', header=False) except : return "error with the server, please refresh"
def get_4chan(lookback_list, tokenizer, model): biz = py4chan.Board('biz') threads = biz.get_all_threads() thread_list = [] post_list = [] timestamp_list = [] for thread in threads: posts = [post.text_comment for post in thread.replies] timestamps = [post.timestamp for post in thread.replies] topics = [thread.topic.text_comment for post in thread.replies] for post in posts: post_list.append(post.strip('>')) for ts in timestamps: timestamp_list.append(ts) for topic in topics: thread_list.append(topic) post_df = pd.DataFrame(timestamp_list, columns=['Timestamp']) post_df['Thread'] = pd.Series(thread_list) post_df['Text'] = pd.Series(post_list) max_val = max(lookback_list) placeholder = get_lookback(max_val) if type(placeholder) == datetime.datetime: placeholder = time.mktime(placeholder.timetuple()) start = datetime.datetime.fromtimestamp(placeholder) unique_posts = post_df.drop_duplicates(keep='first', inplace=False) unique_comment_seqs = tokenizer.texts_to_sequences( unique_posts['Text'].values) padded_seqs = pad_sequences(unique_comment_seqs, maxlen=12) original_seqs = padded_seqs.shape[0] batch_size = model.input_shape[0] filler = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]]) while padded_seqs.shape[0] % batch_size != 0: padded_seqs = np.vstack((padded_seqs, filler)) final_data = np.vstack( (padded_seqs, np.zeros(shape=(batch_size * 10, 12)))) preds = model.predict(final_data, batch_size=128, verbose=0) origs = preds[:original_seqs] unique_posts['Negative'] = origs[:, 0] unique_posts['Positive'] = origs[:, 1] unique_posts[ 'Net_Sentiment'] = unique_posts['Positive'] - unique_posts['Negative'] timeframe_lists = [ unique_posts[unique_posts['Timestamp'] >= dt_to_int(get_lookback(ph))] for ph in lookback_list ] for lb in lookback_list: lb = get_lookback(lb) if type(lb) == datetime.datetime: lb = time.mktime(lb.timetuple()) timing = datetime.datetime.fromtimestamp(lb) print(f'Cryptocurrency 4chan posts from {timing} to now.') return timeframe_lists
def downloadOneThread(id, board, dir): fDir = dir + "\\" + id if not (os.path.exists(fDir)): print("output directory doesnt exist") print("creating output folder") os.mkdir(fDir) print("Output folders created!") else: print("output directory already exists") tBoard = p4.Board(str(board)) if not tBoard: print("Board Not Found") return 0 else: print(tBoard.title) testThread = tBoard.get_thread(id) fileUrl = [] fileUrlName = [] if testThread: print("found thread") else: print("thread not found") end = testThread.file_objects().__sizeof__() print(end, " file objects found to be downloaded") files = testThread.file_objects() fileNameList = None for f in testThread.file_objects(): # fileNameList.append(f.file_url) if os.path.exists(fDir + "\\" + f.filename): print("File exists") time.sleep(1) else: r = requests.get(f.file_url) print("Attempting to Download ", f.filename) print("Downloading to >>" + fDir) print("file doesnt exist. Downloading") with open(fDir + "\\" + f.filename, "wb") as myFile: for chunk in r.iter_content(chunk_size=1024 * 1024): if chunk: myFile.write(chunk) myFile.close() print("File Download Complete") time.sleep(1)
async def fetch_feed(self, url: str): timeout = aiohttp.client.ClientTimeout(total=15) # SPLIT OUT THE URL HERE split = self.url_splitter(url) url_generation = 'https://a.4cdn.org/' + split[ 'board'] + '/thread/' + split['thread'] + '.json' try: async with self.session.get(url_generation, timeout=timeout) as response: data = await response.read() chanboard = basc_py4chan.Board(split['board']) chanthread = chanboard.get_thread(split['thread']) if chanboard.title is not None: if chanthread.id is not None: pass except (aiohttp.ClientError, asyncio.TimeoutError): # We couldn't connect log.debug(f"We could not connect to 4chan.org") debug_exc_log( log, exc, f"We could not connect to 4chan.org.", ) return None except KeyError: # The board doesn't exist log.debug(f"The specified board {board} does not exist") debug_exc_log( log, exc, f"The specified board {board} does not exist.", ) return None except AttributeError: # The thread doesn't exist log.debug(f"The specified thread {thread} does not exist") debug_exc_log( log, exc, f"The specified thread {thread} does not exist.", ) return None except Exception as exc: debug_exc_log( log, exc, f"Unexpected exception type {type(exc)} encountered for {board} -> {thread}", ) return None return chanthread
def watch_4chan(self): # err404 = when thread not found # limit = when thread hit max image limit watch_urls = db.session.query( db.ThreadTable).filter_by(err404=0).filter_by(limit=0).all() if len(watch_urls) > 0: for thread in watch_urls: x = basc_py4chan.Board(board_name=thread.board_id, https=True) self.logger.info("scanning: {}/{}".format( thread.board_id, thread.thread_id)) if x.thread_exists(thread_id=thread.thread_id): y = x.get_thread(thread_id=thread.thread_id, update_if_cached=True) path = self.config['dir_output'] if not os.path.exists(path): os.mkdir(path) path = os.path.join(path, thread.board_id) if not os.path.exists(path): os.mkdir(path) path = os.path.join(path, str(thread.thread_id)) if not os.path.exists(path): os.mkdir(path) file_t = open(os.path.join(path, "topic.txt"), "w") file_t.write(str(y.topic.subject)) file_t.close() self.logger.info("downloading: {}/{}".format( thread.board_id, thread.thread_id)) for file_i in y.files(): file_n = os.path.basename(file_i) if not os.path.exists(os.path.join(path, file_n)): self.logger.debug("receiving: {}".format(file_n)) urllib.urlretrieve(url=file_i, filename=os.path.join( path, file_n)) else: self.logger.debug("skipping: {}".format(file_n)) if not y.imagelimit or y.closed or y.archived or y.bumplimit: # image limit or closed or archived self.logger.info("image limit: {}/{}".format( thread.board_id, thread.thread_id)) thread.limit = 1 db.session.commit() else: thread.err404 = 1 db.session.commit() self.logger.info("404 Not Found: {}/{}".format( thread.board_id, thread.thread_id)) self.logger.info("finished: {}/{}".format( thread.board_id, thread.thread_id)) else: self.logger.info("No thread to watch")
def fetchall_yl(section): conn = sqlite3.connect("/home/ubuntu/ylyl-basc/example.db") board = basc_py4chan.Board(section) listofactivethreads = [] print("list of active threads") for row in conn.execute("select id, threadid, date, files, downloaded, section, active from threads \ where active is 1 and section is (?)", [section]): listofactivethreads.append(row) print(row) for activethread in listofactivethreads: thread = board.get_thread(activethread[1]) prev_downloaded = activethread[4] downloaded = 0 try: for f in thread.file_objects(): if downloaded >= prev_downloaded: # download the files dest_dir = "/home/ubuntu/ylyl-basc/" + section + "/" + str(activethread[1]) + "/" dest_exact = dest_dir + f.filename if not os.path.exists(dest_dir): os.makedirs(dest_dir) r = requests.get(f.file_url) with open(dest_exact, 'wb') as f: f.write(r.content) prev_downloaded+=1 downloaded+=1 except: print("inactive: ", activethread[1]) conn.execute("update threads set active = (?) \ where threadid is (?)", (0, activethread[1])) try: with conn: conn.execute("update threads set downloaded = (?) \ where threadid is (?)", (downloaded, activethread[1])) # print("downloaded" , downloaded) except sqlite3.OperationalError: print("error...updating thread") # print("contents after download:") # for row in conn.execute("select id, threadid, date, files, downloaded, section, active from threads"): # print(row) conn.close()
def get_new_posts_count(board_id, postno, replies_count): board = basc_py4chan.Board(board_id) thread = board.get_thread(postno) if thread is None: return None else: updated_replies_count = len(thread.posts) - 1 total = updated_replies_count - replies_count return total
def main(): mylist = [] s = basc_py4chan.Board('s') thread = s.get_thread(20593800) for file in thread.files(): print(file) mylist.append( str(file)) # turns output into list so I could run for loop for pics in mylist: image_filename = wget.download(pics) #time.sleep(5) #waits for 2 secs between every download because it crashes otherwise print('Image Successfully Downloaded: ', image_filename)
def main(): if len(sys.argv) != 3: print("Usage: python %s [board] [thread]" % sys.argv[0]) print("Shows the URL of all the files in the thread.") print( "Example (download all files in thread): python %s v 12351234 | xargs wget" % sys.argv[0]) return board = basc_py4chan.Board(sys.argv[1]) thread = board.get_thread(int(sys.argv[2])) for f in thread.files(): print(f)
def get_threads(board_id): #threads = json.loads(req("https://a.4cdn.org/{board}/{page}.json".format(board=board, page=page))) board = basc_py4chan.Board(board_id) threads = [] threads.extend(board.get_all_threads()) thread_list = [] for thread in threads: topic = thread.topic thread_values = {} thread_values['no'] = topic.post_number thread_values['board'] = board_id thread_values['post_board'] = board_id thread_values['replies'] = int(thread.num_replies) thread_values['sticky'] = int(thread.sticky) thread_values['closed'] = int(thread.closed) thread_values['name'] = topic.name thread_values['time'] = topic.timestamp thread_values['semantic_url'] = topic.semantic_url if thread.omitted_images: thread_values['images'] = thread.omitted_images else: thread_values['images'] = int(topic.has_file) thread_values['has_file'] = int(topic.has_file) if topic.has_file: thread_values['ext'] = topic.file_extension thread_values['file_deleted'] = int(topic.file_deleted) thread_values['thumbUrl'] = topic.thumbnail_url thread_values['thumbnail_url'] = topic.thumbnail_url thread_values['imgUrl'] = topic.file_url thread_values['file_url'] = topic.file_url thread_values['filename'] = topic.filename if topic.subject and topic.comment: thread_values['com'] = '<b>{}</b><br>'.format( topic.subject) + topic.comment elif not topic.subject and topic.comment: thread_values['com'] = topic.comment elif topic.subject and not topic.comment: thread_values['com'] = '<b>{}</b>'.format(topic.subject) thread_list.append(thread_values) thread_list = utils.parse_posts(thread_list) pyotherside.send('threads', thread_list)
def _add_thread_from_info(self, board_name, thread_id): """Add a thread to our internal list from direct board name/thread id.""" # already exists with self.threads_lock: if thread_id in self.threads: return False # running board object with self.boards_lock: if board_name not in self.boards: self.boards[board_name] = basc_py4chan.Board( board_name, https=self.options.use_ssl) running_board = self.boards[board_name] if not running_board.thread_exists(thread_id): print( THREAD_NONEXISTENT.format( **{ 'site': self.name, 'board': board_name, 'thread_id': thread_id, })) print(THREAD_NONEXISTENT_REASON) return False # add thread to download list with self.threads_lock: self.threads[thread_id] = { 'board': board_name, 'dir': self.base_thread_dir.format(board=board_name, thread=thread_id), 'thread_id': thread_id, 'total_files': 0, 'images_downloaded': 0, 'thumbs_downloaded': 0, 'alive': True, } status_info = self.threads[thread_id] self.update_status('new_thread', info=status_info) self.add_to_dl('thread', board=board_name, thread_id=thread_id) return True
def getRandomThreadBasedOnSelectedWords(boardName, chosenWords, allThreadsFromChosenBoard): topics=[] for word in chosenWords: for thread in allThreadsFromChosenBoard: if thread.topic.text_comment != None: if word in thread.topic.text_comment: topics.append(thread) #thread.text_comment print topics, "**********************" #contains short chosen threads if not topics: return False chosenThreadShort = random.choice(topics) board = basc_py4chan.Board(boardName) chosenThread = board.get_thread(chosenThreadShort.id) print chosenThread return chosenThread
def main(): wordcount = 0 file = open('4chan/txt_4chan.txt', 'a') try: with open('4chan/json_4chan.pickle', 'rb') as f: comment_dict = pickle.load(f) print('Using existing dictionary') except Exception: comment_dict = {} print('Using new dictionary') # list_of_boards = ['b', 'r9k', 's4s', 'pol'] # list_of_boards1 = ['lgbt', 'x', 'adv', 'news', 'vip', 'qa'] pol = ['pol'] for b in pol: board = basc_py4chan.Board(b) thread_ids = board.get_all_thread_ids() archived_ids = get_archived_ids(b) all_ids = thread_ids + archived_ids for thread_id in all_ids: thread = board.get_thread(thread_id) try: for text in thread.all_posts: id = text.post_id if id in comment_dict: continue text = remove(text.text_comment) text = os.linesep.join([s for s in text.splitlines() if s]) # remove empty lines tokens = word_tokenize(text) wordcount += len(tokens) comment_dict[id] = {'body': text, 'thread_id': thread_id} file.write(text) except Exception: print('Failed to load comments\n') continue print(b, thread_id) print('Total: {:,}\n'.format(wordcount)) file.close() with open('4chan/json_4chan.pickle', 'wb') as f: pickle.dump(comment_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
def pullfunc(): print("▄█ █▀▄▀█ ██ ▄▀ ▄███▄ ▄▄▄▄▄ ▄▄▄▄▀ ▄███▄ ██ █ ▄███▄ █▄▄▄▄ ") print("██ █ █ █ █ █ ▄▀ █▀ ▀ █ ▀▄ ▀▀▀ █ █▀ ▀ █ █ █ █▀ ▀ █ ▄▀ ") print("██ █ ▄ █ █▄▄█ █ ▀▄ ██▄▄ ▄ ▀▀▀▀▄ █ ██▄▄ █▄▄█ █ ██▄▄ █▀▀▌ ") print("▐█ █ █ █ █ █ █ █▄ ▄▀ ▀▄▄▄▄▀ █ █▄ ▄▀ █ █ ███▄ █▄ ▄▀ █ █ ") print(" ▐ █ █ ███ ▀███▀ ▀ ▀███▀ █ ▀ ▀███▀ █ ") print(" ▀ █ █ ▀ ") print(" ▀ ▀ By Sen :) ") print("To start please type a board, thread ID, and a folder to save it to") boardInput = input("Board: ") threadInput = input("Thread ID: ") makefolder = input("Folder Name: ") board = basc_py4chan.Board(boardInput, https=False, session=None) numberofposts = 0 thread = board.get_thread(threadInput) os.mkdir(makefolder) madefolder = os.path.join(path, makefolder) answer = None while answer not in ("yes", "no"): postCount = 0 for post in thread.posts: if post.has_file==True: postCount = postCount + 1 print(str(postCount) + " Images") answer = input("pull? y/n: ") if answer == "y": print("") print("Pulling images") currentCount = 0 for post in thread.posts: if post.has_file==True: currentCount = currentCount + 1 print(str(currentCount) + "/" + str(postCount), end='\r') try: saveto = os.path.join(madefolder, post.filename) urllib.request.urlretrieve(post.file_url, saveto) except Exception: pass print("Done!") time.sleep(2) os.system('cls' if os.name == 'nt' else 'clear') pullfunc() elif answer == "n": print("aw ok bye") time.sleep(3) sys.exit(0) else: print("Please enter y/n.")
def getShit(): try: board = basc_py4chan.Board(boards) threads = board.get_all_thread_ids() randomThread = randint(0, len(threads)) thread = board.get_thread(threads[randomThread]) topic = thread.topic cleanr = re.compile('<.*?>') cleantext = re.sub(cleanr, '', topic.comment) test = str(cleantext).replace('>', '') cleanerText = str(test).replace(''', '\'') cleanerText = str(cleanerText).replace('\'', '\'\'') cleanerText = str(cleanerText).replace('\"', '\"\"') insert4ChanShitPost(cleanerText) except Exception as exception: print(exception)