def getSubmissionsFromRedditList(redditList,
                                 source,
                                 earlyOutPoint=None,
                                 unlikeUnsave=False):
    submissions = []
    comments = []

    numTotalSubmissions = len(redditList)
    for currentSubmissionIndex, singleSubmission in enumerate(redditList):
        if currentSubmissionIndex and currentSubmissionIndex % 100 == 0:
            logger.log('Got {} submissions...'.format(currentSubmissionIndex))

        if type(singleSubmission) is praw.models.Submission:
            newSubmission = Submission()

            newSubmission.source = u'reddit'

            newSubmission.title = singleSubmission.title
            newSubmission.author = singleSubmission.author.name if singleSubmission.author else u'no_author'

            newSubmission.subreddit = singleSubmission.subreddit.url
            newSubmission.subredditTitle = singleSubmission.subreddit.title

            newSubmission.body = singleSubmission.selftext
            newSubmission.bodyUrl = singleSubmission.url

            newSubmission.postUrl = singleSubmission.permalink

            submissions.append(newSubmission)

            logger.log(
                percentageComplete(currentSubmissionIndex,
                                   numTotalSubmissions))

            if unlikeUnsave:
                if source == 'liked':
                    singleSubmission.clear_vote()
                else:
                    singleSubmission.unsave()

                logger.log('Unsaved/cleared vote on submission ' +
                           singleSubmission.permalink)

            # Check to see if we've already downloaded this submission; if so, early out
            if (earlyOutPoint and earlyOutPoint[0]
                    and newSubmission.postUrl == earlyOutPoint[0].postUrl):
                logger.log(
                    'Found early out point after ' + str(len(submissions)) +
                    ' new submissions.'
                    ' If you e.g. changed your total requests value and want to go deeper, set'
                    ' Reddit_Try_Request_Only_New to False in your settings.txt'
                )
                break

        # The submission is actually a saved comment
        else:
            # I looked at https://praw.readthedocs.io/en/latest/getting_started/quick_start.html
            #  very bottom to learn how to enumerate what information a submission can provide
            # logger.log(singleSubmission.body)
            # pprint.plogger.log(vars(singleSubmission))
            newSubmission = Submission()

            newSubmission.source = u'reddit'

            newSubmission.title = u'Comment on ' + singleSubmission.link_title
            newSubmission.author = singleSubmission.author.name if singleSubmission.author else u'no_author'

            newSubmission.subreddit = singleSubmission.subreddit.url
            newSubmission.subredditTitle = singleSubmission.subreddit.title

            newSubmission.body = singleSubmission.body
            newSubmission.bodyUrl = singleSubmission.permalink

            newSubmission.postUrl = singleSubmission.link_permalink

            comments.append(newSubmission)

    return submissions, comments
Ejemplo n.º 2
0
def getTumblrUserLikedSubmissions(clientId,
                                  clientSecret,
                                  tokenId,
                                  tokenSecret,
                                  likeRequestLimit=100,
                                  requestOnlyNewCache=None):
    tumblrClient = pytumblr.TumblrRestClient(clientId, clientSecret, tokenId,
                                             tokenSecret)

    # This is an annoying limit the api seems to impose
    POSTS_PER_PAGE = 50

    oldestPageTimestamp = 0
    totalRequests = 0
    submissions = []

    foundOldSubmission = False

    while totalRequests < likeRequestLimit:
        if oldestPageTimestamp:
            tumblrLikes = tumblrClient.likes(**{
                'limit': POSTS_PER_PAGE,
                'offset': totalRequests
            })
        else:
            tumblrLikes = tumblrClient.likes(**{'limit': POSTS_PER_PAGE})

        numPostsThisPage = len(tumblrLikes['liked_posts'])

        if not numPostsThisPage:
            break

        logger.log(
            str(numPostsThisPage) + ' Tumblr likes requested. Total likes: ' +
            str(tumblrLikes['liked_count']))

        for postIndex, post in reversed(
                list(enumerate(tumblrLikes['liked_posts']))):
            if 'photos' in post:
                for photoIndex, photo in enumerate(post['photos']):
                    newSubmission = Submission()

                    newSubmission.source = u'Tumblr'

                    # Tumblr submissions don't have titles, so make one
                    # This'll look ugly in the file browser, unfortunately
                    if len(post['photos']) > 1:
                        newSubmission.title = str(
                            signedCrc32(post['short_url'].encode()))
                        newSubmission.title += u'_'
                        newSubmission.title += str(photoIndex)
                    else:
                        newSubmission.title = str(
                            signedCrc32(post['short_url'].encode()))
                    """logger.log(post)
					return"""
                    newSubmission.author = post['blog_name']

                    newSubmission.subreddit = post['short_url']
                    newSubmission.subredditTitle = post['blog_name'] + '_Tumblr'

                    newSubmission.body = post['caption']
                    newSubmission.bodyUrl = photo['original_size']['url']

                    newSubmission.postUrl = post['short_url']

                    submissions.append(newSubmission)

                    if (requestOnlyNewCache and requestOnlyNewCache[0]
                            and newSubmission.postUrl
                            == requestOnlyNewCache[0].postUrl):
                        logger.log(
                            'Found early out point after ' +
                            str(len(submissions)) + ' new submissions.'
                            ' If you e.g. changed your total requests value and want to go deeper, set'
                            ' Tumblr_Try_Request_Only_New to False in your settings.txt'
                        )
                        foundOldSubmission = True
                        break

            else:
                logger.log('Skipped ' + post['short_url'] +
                           ' (does not have images)')

            if foundOldSubmission:
                break

        if foundOldSubmission:
            break

        oldestPageTimestamp = tumblrLikes['liked_posts'][-1]['liked_timestamp']

        # If we didn't get a full page's worth of posts, we're on the last page
        # Sometimes pages don't have POSTS_PER_PAGE, they're a little under
        RANDOM_PAGE_TOLERANCE = 10
        if numPostsThisPage < POSTS_PER_PAGE - RANDOM_PAGE_TOLERANCE:
            break

        totalRequests += numPostsThisPage

    newEarlyOut = submissions[0] if len(submissions) else None
    return submissions, newEarlyOut
Ejemplo n.º 3
0
def getPinterestUserPinnedSubmissions(email, username, password,
                                      cacheFileName):

    submissions = []

    lastIds = {} if not cacheFileName else loadPinterestCache(cacheFileName)
    updatedLastIds = lastIds

    pinterest = Pinterest(email=email,
                          password=password,
                          username=username,
                          cred_root='pinterest_creds')

    logger.log("Logging in to Pinterest...")
    pinterest.login()

    boards = pinterest.boards(username=username)

    for board in boards:
        # Get all pins for the board
        board_pins = []
        pin_batch = pinterest.board_feed(board_id=board['id'])

        while len(pin_batch) > 0:
            for pin in pin_batch:
                if pin['id'] not in lastIds:
                    # Only using the dict for its key lookup
                    updatedLastIds[pin['id']] = 1
                    board_pins.append(pin)

            pin_batch = pinterest.board_feed(board_id=board['id'])

        for pin in board_pins:

            # I'm not sure how important it is to support these
            if pin['type'] == 'story':
                continue

            newSubmission = Submission()
            newSubmission.source = u'Pinterest'
            # While pins do have titles, 90% of the time they seem useless
            newSubmission.title = pin['id']
            # There is probably a way to figure out who the original pinner is, but oh well
            newSubmission.author = 'N/A'
            newSubmission.subreddit = board['url']
            newSubmission.subredditTitle = board['name'] + '_Pinterest'
            if 'rich_summary' in pin and pin['rich_summary']:
                if 'display_description' in pin['rich_summary']:
                    newSubmission.body = pin['rich_summary'][
                        'display_description']
                else:
                    newSubmission.body = 'N/A'
                newSubmission.postUrl = pin['rich_summary']['url']

            # What is actually downloaded
            newSubmission.bodyUrl = pin['images']['orig']['url']
            submissions.append(newSubmission)

    if cacheFileName:
        savePinterestCache(cacheFileName, updatedLastIds)

    logger.log("Found {} new Pinterest submissions".format(len(submissions)))
    return submissions