Example #1
0
def get_tone_for_user(username, subreddit=None, comments_limit=1):
    """ Compute watson mood of user "username" on subreddit "subreddit" by
    looking at last comments. """

    comments = [x.body for x in get_comments(username, comments_limit)]

    comment_to_tone = []
    for comment in comments:
        json_data = query_watson(comment)

        tone = parse(json_data)

        comment_to_tone.append((comment, tone))

    return comment_to_tone
Example #2
0
def fetch_store_comments(conn):
  two_weeks_ago = datetime.now() - timedelta(days=14)
  
  for channel in youtube.channels:
    print "Channel:", channel

    for video in youtube.get_videos(channel):
      # Video data
      video_id = video['resourceId']['videoId']
      video_url = youtube.video_url(video_id)
      video = {
        'id': video_id,
        'channel': video['channelTitle'],
        'date': video['publishedAt'],
        'title': video['title'],
        'url': video_url,
      }
      video_date = datetime.strptime(video['date'], "%Y-%m-%dT%H:%M:%S.%fZ")
      

      if video_date < two_weeks_ago:
        break 

      print "Video:", video['title']

      try:
        conn.execute("""INSERT INTO video
        VALUES (?, ?, ?, ?, ?)
        """, (video['id'], video['channel'], video['date'],
              video['title'], video['url']))
      except sqlite3.IntegrityError, e:
        pass

      # Youtube comment data
      print "  Downloading Youtube comments"
      num_comments = 0
      for comment in youtube.get_comments(video_id):
        num_comments += 1
        comment = {
          'id': comment['id'],
          'site': 'youtube',
          'video': video_id,
          'date': comment['updatedAt'],
          'author': comment['authorDisplayName'],
          'comment': comment['textDisplay'],
          'url': comment['authorGoogleplusProfileUrl'],
          'up_votes': comment['likeCount'],
          'down_votes': 0,
        }

        new_comment = store_comment(comment)
        if not new_comment:
          break

      print "  Finished downloading ({}) Youtube comments".format(num_comments)


      # Reddit comment data
      print "  Downloading Reddit comments"
      num_comments = 0
      for thread in reddit.get_threads(video_url):
        for comment in reddit.get_comments(thread['id']):
          num_comments += 1
          date = datetime.utcfromtimestamp(comment['created_utc']).isoformat() if 'created_utc' in comment else None
          if 'body' in comment:
            comment = {
              'id': comment['id'],
              'site': 'reddit',
              'video': video_id,
              'date': date,
              'author': comment['author'],
              'comment': comment['body'],
              'url': 'https://reddit.com/r/{}/comments/{}/{}/{}'.format(comment['subreddit'], thread['id'], comment['author'], comment['id']),
              'up_votes': comment['ups'],
              'down_votes': comment['downs'],
            }

            new_comment = store_comment(comment)
            if not new_comment:
              break

      print "  Finished downloading ({}) Reddit comments".format(num_comments)

      conn.commit()
Example #3
0
    def grab_data(self, thread_id):
        """
        Gets posts, inserts/updates the database, inserts history entry
        :return: nothing
        """

        # threads = ['42e77i']

        # init skip variable, should be false by default
        skip = False

        # empty tblHistory dict
        history = dict()

        # get created datetime for tblHistory
        history['created'] = datetime.utcnow()

        # make sure oauth tokens are good, since grabbing threads can take a while
        self.o.refresh()

        # get post data from reddit
        retdata = get_post_data(self.r, thread_id)

        # get post data from database
        # dbdata = get_post_data_from_db(self.Session, thread)

        # package the data for skip_logic
        # package = dict()
        # package['reddit'] = dict(thread_id=retdata['id'], comments=retdata['comments'], archived=retdata['archived'])
        # package['database'] = dbdata

        # if the database doesn't contain a record for the post, it will return false
        # if that happens then we don't want to run it through the skip logic

        # get comments for post from reddit
        data = get_comments(self.r, thread_id)

        # if data['status'] == 'C' then the retrieval was successful, so proceed
        if data['status'] == 'C':

            # query the database to see if the post already exists, will either get ID or None
            post_id = check_post_table(self.Session, thread_id)

            # if the post does not exist (no id returned) then insert the post data into db
            # you can do bulk_comment_insert on everything because this is all new data
            if not post_id:
                post_id = insert_post_data(self.Session, retdata)
                bulk_comment_insert(self.Session, data['comments'], post_id)
            else:
                # go through all comments and insert into database
                for comment in data['comments']:
                    insert_comment_data(self.Session, comment, post_id)

            # get finished time for tblHistory
            history['finished'] = datetime.utcnow()

            # build tblhistory entry
            history['message'] = 'Fetched post ID {0} with {1} comments'.format(retdata['id'], len(data['comments']))
            print(history['message'])

            # set status for now, until I'm able to implement error handling
            history['status'] = 'C'

        elif data['status'] == 'F':
            # data['status'] == 'F' so we build the message and send to insert_history
            history = dict(
                status=data['status'],
                finished=datetime.utcnow(),
                message=data['thread'] + ' failed due to ' + data['errormsg']
            )

        # insert message
        insert_history(self.Session, history)

        print("\n")