Ejemplo n.º 1
0
def main():
    if len(sys.argv) < 2 or len(sys.argv) > 3:
        print("Quick and dirty 4chan Archiver")
        print("%s - Save the JSON and all images for an 4chan post." %
              (sys.argv[0]))
        print("\tUsage: %s <board> <thread_id>" % (sys.argv[0]))
        sys.exit(1)

    board_name = sys.argv[1]
    thread_id = sys.argv[2]

    # grab the first thread on the board by checking first page
    board = basc_py4chan.Board(board_name)
    thread = board.get_thread(thread_id)

    # create folders according to chan.arc standard
    path = os.path.join(os.getcwd(), "4chan", board_name, thread_id)
    images_path = os.path.join(path, "images")
    mkdirs(images_path)

    # archive the thread JSON
    url_builder = basc_py4chan.Url(board_name)
    json_url = url_builder.thread_api_url(thread_id)
    print(url_builder.thread_api_url(thread_id))
    download_json(os.path.join(path, "%s.json" % thread_id), json_url)

    # record the url of every file on the first thread, even extra files in posts
    for img in thread.file_objects():
        print("Downloading %s..." % img.file_url)
        download_file(os.path.join(images_path, "%s" % img.filename),
                      img.file_url)
Ejemplo n.º 2
0
async def vrdoom_task():

    await bot.wait_until_ready()
    threadlinks = list()
    async for things in bot.get_channel(vrdoomchan).history():
        threadlinks.append(things.content)

    while True:
        channel = bot.get_channel(vrdoomchan)
        vr = basc_py4chan.Board('vr')
        vrids = vr.get_all_thread_ids()
        for x in vrids:
            doom = vr.get_thread(x)
            if "DOOM THREAD" in doom.topic.text_comment:
                doompic = doom.topic.file.file_url
                doomurl = f"https://boards.4channel.org/vr/thread/{x}"
                doomtitle = doom.topic.subject
                doomdate = doom.topic.datetime
                e = discord.Embed(title=doomtitle,
                                  url=doomurl,
                                  color=0x9ab89f,
                                  timestamp=doomdate)
                e.set_image(url=doompic)
                if doomurl not in threadlinks:
                    await channel.send(embed=e)
                    threadlinks.append(doomurl)
                    return
        await asyncio.sleep(10)
Ejemplo n.º 3
0
def tryToRespondCorrectly(boardName, message):
	allThreadsFromChosenBoard = basc_py4chan.Board(boardName).get_all_threads()

	chosenWords = getFiveWordsFromListOfWords(message)
	print chosenWords

	chosenThread = getRandomThreadBasedOnSelectedWords(boardName, chosenWords, allThreadsFromChosenBoard)
	if not chosenThread:
		return False
	replyToUser = ""
	posts = chosenThread.all_posts
	fivePosts = []
	if len(posts) > 5:
		while len(fivePosts)<5:
			post = random.choice(posts)
			if post not in fivePosts:
				fivePosts.append(post)
	else:
		fivePosts = posts
	replyToUser += "Original poster said: "
	replyToUser += posts[0].text_comment
	replyToUser += "\n"
	for post in fivePosts:
		if post.name:
			replyToUser += post.name
		else:
			replyToUser += "Anonymous"
		replyToUser += " says:\n"
		replyToUser += post.text_comment
		replyToUser += "\n\n"
	return [fivePosts, replyToUser.encode('utf-8')]##################
Ejemplo n.º 4
0
    def crawl_thread(self, board, thread):
        b = basc_py4chan.Board(board)
        t = b.get_thread(thread_id=thread, raise_404=True)

        cnt_new = 0
        for f in t.file_objects():
            if f.file_deleted: continue
            if f.file_width < IMAGE_MIN_WIDTH or \
                  f.file_height < IMAGE_MIN_HEIGHT or \
                  f.file_size < IMAGE_MIN_SIZE or \
                  f.file_width / f.file_height > IMAGE_MAX_HW_RATIO or \
                  f.file_height / f.file_width > IMAGE_MAX_HW_RATIO:
                continue
            cnt = self.db.query(Image).filter_by(filename=f.filename).count()    # should it be unique
            if cnt != 0: continue

            path = '%s/%s/%s' % (self.__sitename__, board, f.filename)
            img = Image(site=self.__sitename__, board=board, thread=thread,
                        filename_original=f.filename_original, filename=f.filename,
                        width=f.file_width, height=f.file_height, size=f.file_size,
                        hash=f.file_md5_hex, path=path, url=f.file_url,
                        priority=0, status=False)
            self.db.add(img)
            cnt_new += 1
            self.download_queue.put(DownloadTask(id=img.id, priority=img.priority))

        self.db.commit()
        return cnt_new
Ejemplo n.º 5
0
def main():
    # grab the first thread on the board by checking first page
    board = basc_py4chan.Board('v')
    all_thread_ids = board.get_all_thread_ids()
    first_thread_id = all_thread_ids[0]
    thread = board.get_thread(first_thread_id)

    # thread information
    print(thread)
    print('Sticky?', thread.sticky)
    print('Closed?', thread.closed)

    # topic information
    topic = thread.topic
    print('Topic Repr', topic)
    print('Postnumber', topic.post_number)
    print('Timestamp', topic.timestamp)
    print('Datetime', repr(topic.datetime))
    print('Subject', topic.subject)
    print('Comment', topic.text_comment)
    print('Replies', thread.replies)

    # file information
    for f in thread.file_objects():
        print('Filename', f.filename)
        print('  Filemd5hex', f.file_md5_hex)
        print('  Fileurl', f.file_url)
        print('  Thumbnailurl', f.thumbnail_url)
        print()
Ejemplo n.º 6
0
    async def chan(self, ctx, board: str = "", arg: str = ""):
        """Display random post (image in spoiler)"""

        # If no board specified, or random one -> choose random board
        if not board or board.lower() == "random":
            board_list = [
                'a', 'b', 'c', 'd', 'e', 'f', 'g', 'gif', 'd', 'h', 'hr', 'k',
                'm', 'o', 'p', 'r', 's', 't', 'u', 'v', 'vg', 'w', 'wg', 'i',
                'ic', 'r9k', 'cm', 'hm', 'y', '3', 'adv', 'an', 'cgl', 'ck',
                'co', 'diy', 'fa', 'fit', 'hc', 'int', 'jp', 'lit', 'mlp',
                'mu', 'n', 'po', 'pol', 'sci', 'soc', 'sp', 'tg', 'toy', 'trv',
                'tv', 'vp', 'wsg', 'x'
            ]
            board = random.choice(board_list)

        # Attempt to download board
        try:
            b = basc_py4chan.Board(board)
            threads = b.get_all_threads()

        # Invalid board specified (library uses requests)
        except requests.exceptions.HTTPError:
            msg = await ctx.send("`/{0}/` doesn't exist.".format(board))
            await msg.add_reaction(basic_emoji.get("Si"))
            return

        result = ""
        post = None

        # Finding a post with text
        if arg.lower() == "text" or arg.lower() == "txt":
            # Try a random one until successful
            while not post or not post.text_comment:
                thread = random.choice(threads)
                post = random.choice(thread.posts)
                result = post.text_comment

        # Finding a post with image
        elif arg.lower() == "image" or arg.lower() == "img":
            while not post or not post.has_file:
                thread = random.choice(threads)
                post = random.choice(thread.posts)
                # Put image in a spoiler
                result = "|| {0} ||\n{1}".format(post.file_url,
                                                 post.text_comment)

        # If no option specified -> find a post with text, image optional
        else:
            while not post or not post.text_comment:
                thread = random.choice(threads)
                post = random.choice(thread.posts)
                if post.has_file:
                    result = "|| {0} ||\n{1}".format(post.file_url,
                                                     post.text_comment)
                else:
                    result = post.text_comment

        # Split into smaller parts if a post is too long (>2000 characters)
        for segment in wrap(result, 1990):
            await ctx.send(segment)
Ejemplo n.º 7
0
def get_posts(board_id, postno):

    board = basc_py4chan.Board(board_id)
    thread = board.get_thread(postno)

    post_list = []
    post_replies = {}

    if thread != None:
        all_posts = thread.all_posts

        for post in all_posts:

            post_values = {}

            post_values['no'] = post.post_id

            if post.post_id == postno:
                post_values['replies'] = len(thread.posts) - 1
                post_values['sticky'] = int(thread.sticky)
            else:
                post_values['replies'] = 0
                post_values['sticky'] = 0

            post_values['closed'] = int(thread.closed)
            post_values['name'] = post.name
            post_values['time'] = post.timestamp
            post_values['semantic_url'] = post.semantic_url
            post_values['images'] = int(post.has_file)
            post_values['has_file'] = int(post.has_file)

            if post.has_file:
                post_values['ext'] = post.file_extension
                post_values['thumbUrl'] = post.thumbnail_url
                post_values['imgUrl'] = post.file_url
                post_values['filename'] = post.filename
                post_values['file_deleted'] = int(post.file_deleted)

            if post.comment:
                replies = utils.collect_replies(post.comment)
                if replies:
                    for reply in replies:
                        if reply not in post_replies:
                            post_replies[reply] = []
                        post_replies[reply].append(post.post_id)

            if post.subject and post.comment:
                post_values['com'] = '<b>{}</b><br>'.format(
                    post.subject) + post.comment
            elif not post.subject and post.comment:
                post_values['com'] = post.comment
            elif post.subject and not post.comment:
                post_values['com'] = '<b>{}</b>'.format(post.subject)

            post_list.append(post_values)

        post_list = utils.parse_posts(post_list, post_replies)
        pyotherside.send('posts', post_list)
    else:
        pyotherside.send('posts_status', post_list)
def search_board(target_board_name):
    """
    Allows user to write links of files from a given board to a file.
    Allows user to download files of a given board.
    @param target_board_name the board of focus to look for ygyl threads.
    """
    print("Board of focus: " + target_board_name)
    all_threads = basc.Board(target_board_name).get_all_threads()
    target_threads = [
        thread for thread in all_threads if search_for_ygyl(thread.topic)
    ]
    if not target_threads:  # check if there are any threads those topic post contain "ygyl"
        print("No current ygyl threads in /" + target_board_name + "/.")
        return
    all_target_posts = []
    for thread in target_threads:
        thread_files = thread.all_posts
        all_target_posts += thread_files
    files_from_posts = [
        post.file for post in all_target_posts if post.has_file
    ]

    files_object_dictionary = f_o_dict(files_from_posts)
    files_url_dictionary = file_url_dict(files_from_posts)

    save_links(target_board_name, files_url_dictionary)

    to_download(target_board_name, files_object_dictionary)
    def random_thread(self, args):
        """Returns a random thread given a board
            Parameters
            ----------
            args : String
                Board

            Returns
            -------
            Tuple
                2 Strings with the thread url and caption
            """
        query = ''.join(args)
        board = basc_py4chan.Board(str(query))
        thread_ids = board.get_all_thread_ids()
        thread_ids = [str(id) for id in thread_ids
                      ]  # need to do this so str.join below works
        random_thread = randint(0, (len(thread_ids) - 1))
        thread = board.get_thread(int(thread_ids[random_thread]))
        pictures = []
        url = r'http://boards.4chan.org/'.__add__(
            str(query)).__add__('/thread/').__add__(
                str(thread_ids[random_thread]))
        for f in thread.file_objects():
            pictures.append(f.file_url)

        return url, pictures[0]
Ejemplo n.º 10
0
def getthread(boardname='vg', threadname='/tfg'):
    board = basc_py4chan.Board(boardname)
    threads = board.get_all_threads()
    for thread in threads:
        if thread.topic.subject is not None:
            if threadname in thread.topic.subject:
                return board.get_thread(thread.id)
                break
Ejemplo n.º 11
0
 def __init__(self, board):
     self.log = Log()
     self.log.info("Getting /{}/...".format(board))
     try:
         self.board = chan.Board(board)
         self.log.info("/{}/ was fetched successfully".format(board))
     except Exception as e:
         self.log.error("Failed to fetch /{}/ -- {}".format(board, e))
Ejemplo n.º 12
0
def get_yl(section):
    board = basc_py4chan.Board(section)
    all_thread_ids = board.get_all_thread_ids()    
    conn = sqlite3.connect("/home/ubuntu/ylyl-basc/example.db")

    # print("contents before:")
    # for row in conn.execute("select id, threadid, date, files, downloaded, section, active from threads"):        
    #     print(row)

    for thread_id in all_thread_ids:     
        thread = board.get_thread(thread_id)
        try:
            topic = thread.topic
        except:
            continue

        thecomment = str(topic.comment)
        thesubject = str(topic.subject)

        if "YLYL" in thesubject or "ylyl" in thesubject or "Ylyl" in thesubject or "YLYL" \
            in thecomment or "ylyl" in thecomment or "Ylyl" in thecomment:
            
            yl_record = []
            yl_record.append(thread_id)                     #the thread id
            yl_record.append(topic.datetime)                    #the date
            numfiles = 0            
            for f in thread.file_objects():
                numfiles+=1
            yl_record.append(numfiles)                      #number of files
            yl_record.append(0)                             #how many downloaded
            yl_record.append(section)                             #which section
            yl_record.append(1)                             #set thread active
     
            threadexists = 0  # doest not exist in db

            for row in conn.execute("select exists(select 1 from threads where threadid is (?))", [thread_id]):     
                threadexists = row[0] 

            if threadexists == 0:
                try:
                    with conn:
                        conn.execute("insert into threads(threadid, date, files, downloaded, section, active) \
                        values (?, ?, ?, ?, ?, ?)", yl_record)
                except sqlite3.IntegrityError:
                        print("ERROR")    
            else:
                try:
                    with conn:
                        conn.execute("update threads set files = (?) \
                        where threadid is (?)", (yl_record[2],yl_record[0]))
                except sqlite3.OperationalError:
                    print("error...updating thread")   
           
    # print("contents after lurking:")
    # for row in conn.execute("select id, threadid, date, files, downloaded, section, active from threads"):        
    #     print(row)

    conn.close()
Ejemplo n.º 13
0
def check_hate():
    filename1 = 'crawled_data_true.csv'
    filename2 = 'crawled_data_false.csv'
    if os.path.isfile(filename1):

        map_threads_true = set(pd.read_csv(filename1)["post_id"].to_list())
        map_threads_false = set(pd.read_csv(filename2)["post_id"].to_list())

    else:
        map_threads_true = set()
        map_threads_false = set()
    start_time = time.time()
    board_name = 'pol'
    try:

        set_hate = {'ABC', 'coolies', 'chink a billies', 'bamboo coons', 'chinig', 'slopehead', 'chink a billy', 'Chinese wetback', 'bamboo coon', 'ching chong', 'coolie', 'chigger', 'slope', 'slant', 'slant eye', 'wink', 'whoriental', 'gooklet', 'gookette', 'gook eyed', 'gookie', 'goloid', 'gink','dog eater', 'yellow invader', 'rice nigger'}
        board = basc_py4chan.Board(board_name)
        all_thread_ids = board.get_all_thread_ids()
        thread_count = 0
        hate_count = 0
        for id in all_thread_ids:

            thread_count += 1
            if id in map_threads_true:
                hate_count += 1
                continue
            if id in map_threads_false:
                continue

            thread = board.get_thread(id)
            if thread == None or thread.closed == True:
                thread_count += -1
                continue


            for threadcontent in thread.all_posts:
                if any(s.lower() in threadcontent.comment.lower() for s in set_hate):
                    hate_count += 1
                    map_threads_true.add(id)
                    break
                else:
                    map_threads_false.add(id)

        df1 = pd.DataFrame.from_dict(map_threads_true)
        df1.columns = ["post_id"]
        df1.to_csv(filename1, index=False)

        df2 = pd.DataFrame.from_dict(map_threads_false)
        df2.columns = ["post_id"]
        df2.to_csv(filename2, index=False)


        percentage = "{:.2%}".format(hate_count / thread_count)
        print("--- %s seconds ---" % (time.time() - start_time))

        return percentage
    except :
        return "error with the server, please refresh"
Ejemplo n.º 14
0
def picture_download(path, brd):
    if not os.path.exists(path):
        os.mkdir(path)
    else:
        files = glob.glob(path + '*')
        for g in files:
            os.remove(g)
        os.rmdir(path)
        os.mkdir(path)

    board = basc_py4chan.Board(brd)
    t = time.thread_time()
    all_thread_ids = board.get_all_thread_ids()

    for z in all_thread_ids:
        if t <= 7:
            thread = board.get_thread(z)

            # print thread information
            print(thread)
            print('Sticky?', thread.sticky)
            print('Closed?', thread.closed)
            print('Replies:', len(thread.replies))

            # print topic post information
            topic = thread.topic
            print('Topic Repr', topic)
            print('Postnumber', topic.post_number)
            print('Timestamp', topic.timestamp)
            print('Datetime', repr(topic.datetime))
            print('Subject', topic.subject)
            print('Comment', topic.text_comment)

            smth = topic.text_comment[:65].translate({ord('?'): " question "})
            smth = smth.translate({ord('<'): " less than "})
            smth = smth.translate({ord('>'): "Implying "})
            smth = smth.translate({ord(':'): ";"})
            smth = smth.translate({ord('"'): "'"})
            smth = smth.translate({ord('/'): " slash "})
            smth = smth.translate({ord('\n'): " newLine "})
            smth = smth.translate({ord('\\'): " backslash "})

            info = "folder" + smth + str(random())

            pather = path + info + "\\"
            os.mkdir(pather)
            for x, y in zip(thread.replies, thread.file_objects()):
                print(t)
                print('Filename', y.filename_original)
                print('Fileurl', y.file_url)
                urllib.request.urlretrieve(y.file_url,
                                           pather + y.filename_original)
                print()
                print(x.post_id)
                print(x.text_comment)
        else:
            break
Ejemplo n.º 15
0
def main():
    # grab the first thread on the board by checking first page
    board = basc_py4chan.Board('v')
    all_thread_ids = board.get_all_thread_ids()
    first_thread_id = all_thread_ids[0]
    thread = board.get_thread(first_thread_id)

    # display the url of every file on the first thread, even extra files in posts
    for url in thread.files():
        print(url)
Ejemplo n.º 16
0
def check_hate():
    print("here")
    board_name = 'pol'
    # filename = 'crawled_data.csv'
    # chinese_hate_speech_filename = "hatebase_vocab_chinese.csv"
    # asian_hate_speech_filename = "hatebase_vocab_asian.csv"

    ##########DONOT EDIT BELOW##################################################################

    # while True:
    try:
        # df_chinese = pd.read_csv(chinese_hate_speech_filename)
        # df_asian = pd.read_csv(asian_hate_speech_filename)
        # list_id = []
        # if os.path.isfile(filename):
        #     list_id = pd.read_csv(filename)["post_id"].to_list()
        
        # df_total = pd.concat([df_chinese, df_asian])
        # list_hate = df_total["term"]
        list_hate = ['ABCs', 'ABC', 'spinks', 'spink', 'slopey', 'winks', 'slopes', 'slopies', 'slants', 'slopeheads', 'slant eyes', 'sideways vaginas', 'sideways cooters', 'sideways pussies', 'coolies', 'chonkies', 'chunkies', 'Chinese wetbacks', 'ching chongs', 'chinigs', 'chink a billies', 'chiggers', 'celestials', 'bamboo coons', 'chinig', 'sideways cooter', 'slopehead', 'chink a billy', 'Chinese wetback', 'bamboo coon', 'ching chong', 'coolie', 'slopy', 'chonky', 'chunky', 'sideways pussy', 'sideways v****a', 'chigger', 'slope', 'slant', 'slant eye', 'wink', 'celestial', 'whoriental', 'whorientals', 'gooky eyes', 'gooklets', 'gooklet', 'gookettes', 'gookette', 'gook eyed', 'gookies', 'gookie', 'goloids', 'goloid', 'ginks', 'gink', 'dog eaters', 'dog eater', 'yellow invaders', 'rice niggers', 'yellow invader', 'rice nigger']
        # col_names = ["text_comment", "datetime", "post_id"]
        # df_asian_hate = pd.DataFrame(columns = col_names)
        # get the board we want
        board = basc_py4chan.Board(board_name)
        # select the first thread on the board
        all_thread_ids = board.get_all_thread_ids()
        # thread_list = []
        thread_count = 0
        hate_count = 0
        for id in all_thread_ids:

            first_thread_id = id
            thread = board.get_thread(first_thread_id)
            if thread == None:
                continue
            topic = thread.topic
            for thread in thread.all_posts:
                thread_count += 1
                if any(s.lower() in thread.comment.lower() for s in list_hate):
                    hate_count += 1
                    # temp_dict = {}
                    # temp_dict["text_comment"] = thread.text_comment
                    # temp_dict["datetime"] = thread.datetime
                    # temp_dict["post_id"] = thread.post_id
                    # df_asian_hate = df_asian_hate.append(temp_dict, ignore_index = True)

        percentage = "{:.2%}".format(hate_count / thread_count)
        return percentage
        # if not os.path.isfile(filename):
        #     df_asian_hate.to_csv(filename, header='column_names')

        # else: # else it exists so append without writing the header
        #     df_asian_hate.to_csv(filename, mode='a', header=False)
    except :
        return "error with the server, please refresh"
Ejemplo n.º 17
0
def get_4chan(lookback_list, tokenizer, model):
    biz = py4chan.Board('biz')
    threads = biz.get_all_threads()
    thread_list = []
    post_list = []
    timestamp_list = []
    for thread in threads:
        posts = [post.text_comment for post in thread.replies]
        timestamps = [post.timestamp for post in thread.replies]
        topics = [thread.topic.text_comment for post in thread.replies]
        for post in posts:
            post_list.append(post.strip('>'))
        for ts in timestamps:
            timestamp_list.append(ts)
        for topic in topics:
            thread_list.append(topic)

    post_df = pd.DataFrame(timestamp_list, columns=['Timestamp'])
    post_df['Thread'] = pd.Series(thread_list)
    post_df['Text'] = pd.Series(post_list)

    max_val = max(lookback_list)
    placeholder = get_lookback(max_val)

    if type(placeholder) == datetime.datetime:
        placeholder = time.mktime(placeholder.timetuple())
    start = datetime.datetime.fromtimestamp(placeholder)

    unique_posts = post_df.drop_duplicates(keep='first', inplace=False)
    unique_comment_seqs = tokenizer.texts_to_sequences(
        unique_posts['Text'].values)
    padded_seqs = pad_sequences(unique_comment_seqs, maxlen=12)
    original_seqs = padded_seqs.shape[0]
    batch_size = model.input_shape[0]
    filler = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
    while padded_seqs.shape[0] % batch_size != 0:
        padded_seqs = np.vstack((padded_seqs, filler))
    final_data = np.vstack(
        (padded_seqs, np.zeros(shape=(batch_size * 10, 12))))
    preds = model.predict(final_data, batch_size=128, verbose=0)
    origs = preds[:original_seqs]
    unique_posts['Negative'] = origs[:, 0]
    unique_posts['Positive'] = origs[:, 1]
    unique_posts[
        'Net_Sentiment'] = unique_posts['Positive'] - unique_posts['Negative']
    timeframe_lists = [
        unique_posts[unique_posts['Timestamp'] >= dt_to_int(get_lookback(ph))]
        for ph in lookback_list
    ]
    for lb in lookback_list:
        lb = get_lookback(lb)
        if type(lb) == datetime.datetime: lb = time.mktime(lb.timetuple())
        timing = datetime.datetime.fromtimestamp(lb)
        print(f'Cryptocurrency 4chan posts from {timing} to now.')
    return timeframe_lists
Ejemplo n.º 18
0
def downloadOneThread(id, board, dir):

    fDir = dir + "\\" + id

    if not (os.path.exists(fDir)):
        print("output directory doesnt exist")
        print("creating output folder")
        os.mkdir(fDir)
        print("Output folders created!")
    else:
        print("output directory already exists")

    tBoard = p4.Board(str(board))

    if not tBoard:
        print("Board Not Found")
        return 0
    else:
        print(tBoard.title)
        testThread = tBoard.get_thread(id)
        fileUrl = []
        fileUrlName = []
        if testThread:
            print("found thread")
        else:
            print("thread not found")

        end = testThread.file_objects().__sizeof__()
        print(end, " file objects found to be downloaded")
        files = testThread.file_objects()
        fileNameList = None
        for f in testThread.file_objects():

            # fileNameList.append(f.file_url)

            if os.path.exists(fDir + "\\" + f.filename):
                print("File exists")
                time.sleep(1)

            else:
                r = requests.get(f.file_url)
                print("Attempting to Download ", f.filename)
                print("Downloading to >>" + fDir)

                print("file doesnt exist. Downloading")
                with open(fDir + "\\" + f.filename, "wb") as myFile:
                    for chunk in r.iter_content(chunk_size=1024 * 1024):
                        if chunk:
                            myFile.write(chunk)

                myFile.close()
                print("File Download Complete")

                time.sleep(1)
Ejemplo n.º 19
0
    async def fetch_feed(self, url: str):
        timeout = aiohttp.client.ClientTimeout(total=15)
        # SPLIT OUT THE URL HERE
        split = self.url_splitter(url)
        url_generation = 'https://a.4cdn.org/' + split[
            'board'] + '/thread/' + split['thread'] + '.json'
        try:
            async with self.session.get(url_generation,
                                        timeout=timeout) as response:
                data = await response.read()

            chanboard = basc_py4chan.Board(split['board'])
            chanthread = chanboard.get_thread(split['thread'])
            if chanboard.title is not None:
                if chanthread.id is not None:
                    pass

        except (aiohttp.ClientError, asyncio.TimeoutError):
            # We couldn't connect
            log.debug(f"We could not connect to 4chan.org")
            debug_exc_log(
                log,
                exc,
                f"We could not connect to 4chan.org.",
            )
            return None
        except KeyError:
            # The board doesn't exist
            log.debug(f"The specified board {board} does not exist")
            debug_exc_log(
                log,
                exc,
                f"The specified board {board} does not exist.",
            )
            return None
        except AttributeError:
            # The thread doesn't exist
            log.debug(f"The specified thread {thread} does not exist")
            debug_exc_log(
                log,
                exc,
                f"The specified thread {thread} does not exist.",
            )
            return None
        except Exception as exc:
            debug_exc_log(
                log,
                exc,
                f"Unexpected exception type {type(exc)} encountered for {board} -> {thread}",
            )
            return None

        return chanthread
Ejemplo n.º 20
0
    def watch_4chan(self):
        # err404 = when thread not found
        # limit = when thread hit max image limit
        watch_urls = db.session.query(
            db.ThreadTable).filter_by(err404=0).filter_by(limit=0).all()
        if len(watch_urls) > 0:
            for thread in watch_urls:
                x = basc_py4chan.Board(board_name=thread.board_id, https=True)
                self.logger.info("scanning: {}/{}".format(
                    thread.board_id, thread.thread_id))
                if x.thread_exists(thread_id=thread.thread_id):
                    y = x.get_thread(thread_id=thread.thread_id,
                                     update_if_cached=True)

                    path = self.config['dir_output']
                    if not os.path.exists(path):
                        os.mkdir(path)
                    path = os.path.join(path, thread.board_id)
                    if not os.path.exists(path):
                        os.mkdir(path)
                    path = os.path.join(path, str(thread.thread_id))
                    if not os.path.exists(path):
                        os.mkdir(path)
                        file_t = open(os.path.join(path, "topic.txt"), "w")
                        file_t.write(str(y.topic.subject))
                        file_t.close()

                    self.logger.info("downloading: {}/{}".format(
                        thread.board_id, thread.thread_id))
                    for file_i in y.files():
                        file_n = os.path.basename(file_i)
                        if not os.path.exists(os.path.join(path, file_n)):
                            self.logger.debug("receiving: {}".format(file_n))
                            urllib.urlretrieve(url=file_i,
                                               filename=os.path.join(
                                                   path, file_n))
                        else:
                            self.logger.debug("skipping: {}".format(file_n))

                    if not y.imagelimit or y.closed or y.archived or y.bumplimit:  # image limit or closed or archived
                        self.logger.info("image limit: {}/{}".format(
                            thread.board_id, thread.thread_id))
                        thread.limit = 1
                        db.session.commit()
                else:
                    thread.err404 = 1
                    db.session.commit()
                    self.logger.info("404 Not Found: {}/{}".format(
                        thread.board_id, thread.thread_id))
                self.logger.info("finished: {}/{}".format(
                    thread.board_id, thread.thread_id))
        else:
            self.logger.info("No thread to watch")
Ejemplo n.º 21
0
def fetchall_yl(section):
    conn = sqlite3.connect("/home/ubuntu/ylyl-basc/example.db") 
    board = basc_py4chan.Board(section)
    listofactivethreads = []
    print("list of active threads")
    
    for row in conn.execute("select id, threadid, date, files, downloaded, section, active from threads \
    where active is 1 and section is (?)", [section]): 
        listofactivethreads.append(row)
        print(row)
       
    
    for activethread in listofactivethreads:
        thread = board.get_thread(activethread[1])
        prev_downloaded = activethread[4]    
        downloaded = 0
        try:
            for f in thread.file_objects():     
                if downloaded >= prev_downloaded:
                    # download the files

                    dest_dir = "/home/ubuntu/ylyl-basc/" + section + "/" + str(activethread[1]) + "/"
                    dest_exact = dest_dir + f.filename

                    if not os.path.exists(dest_dir):
                        os.makedirs(dest_dir)
                    r = requests.get(f.file_url)

                    with open(dest_exact, 'wb') as f:  
                        f.write(r.content)

                    prev_downloaded+=1          
                downloaded+=1
        except:
            print("inactive: ", activethread[1])
            conn.execute("update threads set active = (?) \
            where threadid is (?)", (0, activethread[1]))
              
        try:
            with conn:
                conn.execute("update threads set downloaded = (?) \
                where threadid is (?)", (downloaded, activethread[1]))
                # print("downloaded" , downloaded)
        except sqlite3.OperationalError:
            print("error...updating thread")   


    # print("contents after download:")
    # for row in conn.execute("select id, threadid, date, files, downloaded, section, active from threads"):        
    #     print(row)

    conn.close()
Ejemplo n.º 22
0
def get_new_posts_count(board_id, postno, replies_count):

    board = basc_py4chan.Board(board_id)
    thread = board.get_thread(postno)

    if thread is None:
        return None
    else:
        updated_replies_count = len(thread.posts) - 1

        total = updated_replies_count - replies_count

        return total
Ejemplo n.º 23
0
def main():
    mylist = []
    s = basc_py4chan.Board('s')
    thread = s.get_thread(20593800)
    for file in thread.files():
        print(file)
        mylist.append(
            str(file))  # turns output into list so I could run for loop

    for pics in mylist:
        image_filename = wget.download(pics)
        #time.sleep(5) #waits for 2 secs between every download because it crashes otherwise
        print('Image Successfully Downloaded: ', image_filename)
Ejemplo n.º 24
0
def main():
    if len(sys.argv) != 3:
        print("Usage: python %s [board] [thread]" % sys.argv[0])
        print("Shows the URL of all the files in the thread.")
        print(
            "Example (download all files in thread): python %s v 12351234 | xargs wget"
            % sys.argv[0])
        return

    board = basc_py4chan.Board(sys.argv[1])
    thread = board.get_thread(int(sys.argv[2]))
    for f in thread.files():
        print(f)
Ejemplo n.º 25
0
def get_threads(board_id):
    #threads = json.loads(req("https://a.4cdn.org/{board}/{page}.json".format(board=board, page=page)))

    board = basc_py4chan.Board(board_id)

    threads = []
    threads.extend(board.get_all_threads())

    thread_list = []

    for thread in threads:
        topic = thread.topic
        thread_values = {}
        thread_values['no'] = topic.post_number
        thread_values['board'] = board_id
        thread_values['post_board'] = board_id
        thread_values['replies'] = int(thread.num_replies)
        thread_values['sticky'] = int(thread.sticky)
        thread_values['closed'] = int(thread.closed)
        thread_values['name'] = topic.name
        thread_values['time'] = topic.timestamp
        thread_values['semantic_url'] = topic.semantic_url
        if thread.omitted_images:
            thread_values['images'] = thread.omitted_images
        else:
            thread_values['images'] = int(topic.has_file)

        thread_values['has_file'] = int(topic.has_file)

        if topic.has_file:
            thread_values['ext'] = topic.file_extension
            thread_values['file_deleted'] = int(topic.file_deleted)
            thread_values['thumbUrl'] = topic.thumbnail_url
            thread_values['thumbnail_url'] = topic.thumbnail_url
            thread_values['imgUrl'] = topic.file_url
            thread_values['file_url'] = topic.file_url
            thread_values['filename'] = topic.filename

        if topic.subject and topic.comment:
            thread_values['com'] = '<b>{}</b><br>'.format(
                topic.subject) + topic.comment
        elif not topic.subject and topic.comment:
            thread_values['com'] = topic.comment
        elif topic.subject and not topic.comment:
            thread_values['com'] = '<b>{}</b>'.format(topic.subject)

        thread_list.append(thread_values)

    thread_list = utils.parse_posts(thread_list)
    pyotherside.send('threads', thread_list)
Ejemplo n.º 26
0
    def _add_thread_from_info(self, board_name, thread_id):
        """Add a thread to our internal list from direct board name/thread id."""
        # already exists
        with self.threads_lock:
            if thread_id in self.threads:
                return False

        # running board object
        with self.boards_lock:
            if board_name not in self.boards:
                self.boards[board_name] = basc_py4chan.Board(
                    board_name, https=self.options.use_ssl)
            running_board = self.boards[board_name]

            if not running_board.thread_exists(thread_id):
                print(
                    THREAD_NONEXISTENT.format(
                        **{
                            'site': self.name,
                            'board': board_name,
                            'thread_id': thread_id,
                        }))
                print(THREAD_NONEXISTENT_REASON)
                return False

        # add thread to download list
        with self.threads_lock:
            self.threads[thread_id] = {
                'board':
                board_name,
                'dir':
                self.base_thread_dir.format(board=board_name,
                                            thread=thread_id),
                'thread_id':
                thread_id,
                'total_files':
                0,
                'images_downloaded':
                0,
                'thumbs_downloaded':
                0,
                'alive':
                True,
            }
            status_info = self.threads[thread_id]
        self.update_status('new_thread', info=status_info)

        self.add_to_dl('thread', board=board_name, thread_id=thread_id)
        return True
Ejemplo n.º 27
0
def getRandomThreadBasedOnSelectedWords(boardName, chosenWords, allThreadsFromChosenBoard):
	topics=[]
	for word in chosenWords:
		for thread in allThreadsFromChosenBoard:
			if thread.topic.text_comment != None:
				if word in thread.topic.text_comment:
					topics.append(thread) #thread.text_comment
	print topics, "**********************" #contains short chosen threads
	if not topics:
		return False
	chosenThreadShort = random.choice(topics)
	board = basc_py4chan.Board(boardName)
	chosenThread = board.get_thread(chosenThreadShort.id)
	print chosenThread
	return chosenThread
Ejemplo n.º 28
0
def main():
    wordcount = 0
    file = open('4chan/txt_4chan.txt', 'a')

    try:
        with open('4chan/json_4chan.pickle', 'rb') as f:
            comment_dict = pickle.load(f)
            print('Using existing dictionary')
    except Exception:
        comment_dict = {}
        print('Using new dictionary')

    # list_of_boards = ['b', 'r9k', 's4s', 'pol']
    # list_of_boards1 = ['lgbt', 'x', 'adv', 'news', 'vip', 'qa']
    pol = ['pol']
    for b in pol:
        board = basc_py4chan.Board(b)

        thread_ids = board.get_all_thread_ids()
        archived_ids = get_archived_ids(b)
        all_ids = thread_ids + archived_ids
        for thread_id in all_ids:
            thread = board.get_thread(thread_id)
            try:
                for text in thread.all_posts:
                    id = text.post_id
                    if id in comment_dict:
                        continue

                    text = remove(text.text_comment)
                    text = os.linesep.join([s for s in text.splitlines() if s]) # remove empty lines
                    tokens = word_tokenize(text)
                    wordcount += len(tokens)

                    comment_dict[id] = {'body': text, 'thread_id': thread_id}
                    file.write(text)

            except Exception:
                print('Failed to load comments\n')
                continue

            print(b, thread_id)
            print('Total: {:,}\n'.format(wordcount))

    file.close()

    with open('4chan/json_4chan.pickle', 'wb') as f:
        pickle.dump(comment_dict, f, protocol=pickle.HIGHEST_PROTOCOL)
Ejemplo n.º 29
0
def pullfunc():
    print("▄█ █▀▄▀█ ██     ▄▀  ▄███▄          ▄▄▄▄▄      ▄▄▄▄▀ ▄███▄   ██   █     ▄███▄   █▄▄▄▄ ")
    print("██ █ █ █ █ █  ▄▀    █▀   ▀        █     ▀▄ ▀▀▀ █    █▀   ▀  █ █  █     █▀   ▀  █  ▄▀ ")
    print("██ █ ▄ █ █▄▄█ █ ▀▄  ██▄▄        ▄  ▀▀▀▀▄       █    ██▄▄    █▄▄█ █     ██▄▄    █▀▀▌  ")
    print("▐█ █   █ █  █ █   █ █▄   ▄▀      ▀▄▄▄▄▀       █     █▄   ▄▀ █  █ ███▄  █▄   ▄▀ █  █  ")
    print(" ▐    █     █  ███  ▀███▀                    ▀      ▀███▀      █     ▀ ▀███▀     █   ")
    print("     ▀     █                                                  █                 ▀    ")
    print("          ▀                                                  ▀     By Sen :)         ")
    print("To start please type a board, thread ID, and a folder to save it to")
    boardInput = input("Board: ")
    threadInput = input("Thread ID: ")
    makefolder = input("Folder Name: ")
    board = basc_py4chan.Board(boardInput, https=False, session=None)
    numberofposts = 0
    thread = board.get_thread(threadInput)
    os.mkdir(makefolder)
    madefolder = os.path.join(path, makefolder)
    answer = None
    while answer not in ("yes", "no"):
        postCount = 0
        for post in thread.posts:
            if post.has_file==True:
                postCount = postCount + 1 
        print(str(postCount) + " Images")
        answer = input("pull? y/n: ")
        if answer == "y":
            print("")
            print("Pulling images")
            currentCount = 0
            for post in thread.posts:
                if post.has_file==True:
                    currentCount = currentCount + 1
                    print(str(currentCount) + "/" + str(postCount), end='\r')
                    try:
                        saveto = os.path.join(madefolder, post.filename)
                        urllib.request.urlretrieve(post.file_url, saveto)
                    except Exception:
                        pass 
            print("Done!")
            time.sleep(2)
            os.system('cls' if os.name == 'nt' else 'clear')
            pullfunc()
        elif answer == "n":
            print("aw ok bye")
            time.sleep(3)
            sys.exit(0)
        else:
            print("Please enter y/n.")
Ejemplo n.º 30
0
def getShit():
    try:
        board = basc_py4chan.Board(boards)
        threads = board.get_all_thread_ids()
        randomThread = randint(0, len(threads))
        thread = board.get_thread(threads[randomThread])
        topic = thread.topic
        cleanr = re.compile('<.*?>')
        cleantext = re.sub(cleanr, '', topic.comment)
        test = str(cleantext).replace('&gt;', '')
        cleanerText = str(test).replace('&#039;', '\'')
        cleanerText = str(cleanerText).replace('\'', '\'\'')
        cleanerText = str(cleanerText).replace('\"', '\"\"')
        insert4ChanShitPost(cleanerText)
    except Exception as exception:
        print(exception)