Exemple #1
0
def get_top_subreddits(min_subscribers=500000):
    """Scraper for redditlist.com.
    Retrieves top subreddits with at least `min_subscribers`.

    :param min_subscribers: filter for subreddits (int)
    :return: list of subreddits with at least `min_subscribers` (List[Subreddit])
    """
    pagination_template = "/?page={}"
    url = "http://www.redditlist.com{page}"
    subs = []
    page = 1
    while True:
        source_code = requests.get(
            url.format(page=pagination_template.format(page)))
        plain_text = source_code.text
        soup = BeautifulSoup(plain_text, "html.parser")
        listings = soup.findAll("div",
                                attrs={"class": "span4 listing"
                                       })  # subscriber list is the middle one
        items = listings[1].findAll("div", {"class": "listing-item"})
        for sub in items:
            attrs = sub.attrs
            num_subscribers = int(
                sub.find("span", {
                    "class": "listing-stat"
                }).get_text().replace(",", ""))
            if num_subscribers < min_subscribers:
                return sorted(subs, key=attrgetter("rank"))
            subs.append(
                subreddit(
                    name=attrs["data-target-subreddit"],
                    rank=int(
                        sub.find("span", {
                            "class": "rank-value"
                        }).get_text()),
                    url=sub.find("span", {
                        "class": "subreddit-url"
                    }).find("a").get("href"),
                    subscribers=num_subscribers,
                    type=attrs["data-target-filter"],
                ))
        page += 1
Exemple #2
0
def random_submission():
    log.info("making random submission")
    # Get a random submission from a random subreddit
    END_DATE_PY = datetime.datetime.now() - datetime.timedelta(
        days=NUMBER_DAYS_FOR_POST_TO_BE_OLD)
    ED = int((END_DATE_PY - datetime.datetime(1970, 1, 1)).total_seconds())

    START_DATE_PY = END_DATE_PY - datetime.timedelta(days=1)
    SD = int((START_DATE_PY - datetime.datetime(1970, 1, 1)).total_seconds())

    log.info(START_DATE_PY)
    log.info(END_DATE_PY)
    log.info(SD)
    log.info(ED)
    DATE_DIFF = ""

    log.info("choosing subreddits")
    if SUBREDDIT_LIST:
        log.info('using SUBREDDIT_LIST: {}'.format(SUBREDDIT_LIST))
        subreddits = []
        for subname in SUBREDDIT_LIST:
            subreddits.append(
                subreddit(name=subname,
                          rank=1,
                          url="https://example.com",
                          subscribers=1000,
                          type="what"))
    else:
        log.info("using get_top_subreddits")
        subreddits = get_top_subreddits()
        #log.info(subreddits)

    total_posts = []

    for sub in subreddits[:TOP_SUBREDDIT_NUM]:
        big_upvote_posts = []
        log.info("\n{}\n{}".format("#" * 20, sub))
        tops = get_submissions(SD, ED, sub.name)
        big_upvote_posts = list(
            filter(lambda item: item["score"] >= MIN_SCORE, tops))
        total_posts += big_upvote_posts
        log.info("found {} posts with score >= {}".format(
            len(big_upvote_posts), MIN_SCORE))
        del big_upvote_posts

    post_to_repost = random.choice(total_posts)
    # print(post_to_repost)
    # print("doing submission")
    rand_sub = api.submission(id=post_to_repost["id"])

    own_user = api.redditor(REDDIT_USERNAME)
    for submission in own_user.submissions.new(limit=20):
        if submission.title == rand_sub.title:
            log.error(
                "I had posted the post I was just about to make in the last 20 posts, I'm not posting it again."
            )
            return

    log.info(rand_sub.title)
    #log.info(str(rand_sub))

    # Check if there's any items in the submissions list. If not display error
    if rand_sub:
        try:
            # Check if the we're reposting a selfpost or a link post.
            # Set the required params accodingly, and reuse the content
            # from the old post
            log.info("submission title: " + rand_sub.title)
            log.info("posting to: {}".format(rand_sub.subreddit.name))
            if rand_sub.is_self:
                params = {
                    "title": rand_sub.title,
                    "selftext": rand_sub.selftext
                }
            else:
                params = {"title": rand_sub.title, "url": rand_sub.url}

            # Submit the same content to the same subreddit. Prepare your salt picks
            api.subreddit(rand_sub.subreddit.display_name).submit(**params)
        except praw.exceptions.APIException as e:
            raise e
        except Exception as e:
            log.info(e)
    else:
        log.error("something broke")
Exemple #3
0
def random_submission():
    log.info("making random submission")
    # Get a random submission from a random subreddit
    END_DATE_PY = datetime.datetime.now() - datetime.timedelta(
        days=NUMBER_DAYS_FOR_POST_TO_BE_OLD)
    ED = int((END_DATE_PY - datetime.datetime(1970, 1, 1)).total_seconds())

    START_DATE_PY = END_DATE_PY - datetime.timedelta(days=1)
    SD = int((START_DATE_PY - datetime.datetime(1970, 1, 1)).total_seconds())

    log.info(START_DATE_PY)
    log.info(END_DATE_PY)
    log.info(SD)
    log.info(ED)
    DATE_DIFF = ""

    db_perm = TinyDB(DB_PERM).table('table')
    with transaction(db_perm) as tr:
        tr.insert({'session': str(time.time())})

    log.info("choosing subreddits")
    if SUBREDDIT_LIST:
        log.info('using SUBREDDIT_LIST: {}'.format(SUBREDDIT_LIST))
        subreddits = []
        for subname in SUBREDDIT_LIST:
            subreddits.append(
                subreddit(name=subname,
                          rank=1,
                          url="https://example.com",
                          subscribers=1000,
                          type="what"))
    else:
        log.info("using get_top_subreddits")
        subreddits = get_top_subreddits()
        #filter disallowded
        subreddits = [
            y for y in subreddits
            if y.name.lower().strip() not in DISALLOWED_SUBS
        ]
        #log.info(subreddits)

    total_posts = []

    for sub in subreddits[:TOP_SUBREDDIT_NUM]:
        big_upvote_posts = []
        log.info("\n{}\n{}".format("#" * 20, sub))
        tops = get_submissions(SD, ED, sub.name)
        big_upvote_posts = list(
            filter(lambda item: item["score"] >= MIN_SCORE, tops))
        total_posts += big_upvote_posts
        log.info("found {} posts with score >= {}".format(
            len(big_upvote_posts), MIN_SCORE))
        del big_upvote_posts

    #cr1 checking of post already submitted before or my any thread and taking another post. MAX_TRIES = 10 else break
    print('[SAN] Sumbitting now')
    i = 1
    while i < 10:
        post_to_repost = random.choice(total_posts)
        print('[SAN] Checking submission no. ' + str(i) + ' post_id=' +
              str(post_to_repost["id"]))
        postSubmitted = db_perm.search(Query().id == post_to_repost["id"])

        if len(postSubmitted) > 0:
            print('[SAN] [ALREADY SUBMITTED] ' + postSubmitted[0]['id'])
        else:
            break
        i += 1
        if i == 9:
            print('[SAN] All submissions are already submitted')
            return
    print('[SAN] Post available')
    # print(post_to_repost)
    # print("doing submission")
    rand_sub = api.submission(id=post_to_repost["id"])

    own_user = api.redditor(REDDIT_USERNAME)
    for submission in own_user.submissions.new(limit=20):
        if submission.title == rand_sub.title:
            log.error(
                "I had posted the post I was just about to make in the last 20 posts, I'm not posting it again."
            )
            return

    log.info(rand_sub.title)
    #log.info(str(rand_sub))

    # Check if there's any items in the submissions list. If not display error
    if rand_sub:
        try:
            # Check if the we're reposting a selfpost or a link post.
            # Set the required params accodingly, and reuse the content
            # from the old post
            if DO_WE_SPIN_TITLES:
                rand_sub.title = rewrite_text(SPINNER_API, rand_sub.title)
            else:
                rand_sub.title = rand_sub.title
            log.info("submission title: " + rand_sub.title)
            log.info("posting to: {}".format(rand_sub.subreddit.name))
            if rand_sub.is_self:
                params = {
                    "title": rand_sub.title,
                    "selftext": rand_sub.selftext
                }
            else:
                if DO_WE_REUPLOAD_TO_IMGUR:  ## Returns the same result currently. Imgur Module needs more testing and url parsing. Automatically adds ? to end of url in preparation of obfusacating the URL from 'other discussion tab'
                    params = {
                        "title":
                        rand_sub.title,
                        "url":
                        append_params_to_url(DO_WE_ADD_PARAMS_REUPLOAD,
                                             rand_sub.url)
                    }
                else:
                    params = {
                        "title":
                        rand_sub.title,
                        "url":
                        append_params_to_url(DO_WE_ADD_PARAMS_REUPLOAD,
                                             rand_sub.url)
                    }

            # Submit the same content to the same subreddit. Prepare your salt picks
            api.subreddit(rand_sub.subreddit.display_name).submit(**params)
            #add the ids to tracked list to avoid resubmission by other threads
            table = TinyDB('db.json').table('table')
            with transaction(table) as tr:
                #insert a new record
                tr.insert({'id': rand_sub.id})
            # adding to my san custom DB
            with transaction(db_perm) as tr:
                tr.insert({'id': rand_sub.id})

        except praw.exceptions.APIException as e:
            raise e
        except Exception as e:
            log.info(e)
    else:
        log.error("something broke")