Ejemplo n.º 1
0
def rebuild_tweets():
  tweets = list(datastore.load('twitter_tweets'))
  new_tweets = []
  for tweet in tweets:
    urls = tweet['urls']
    hashtags = tweet['hashtags']
    if urls is not None and urls != '':
      try:
        urls = ast.literal_eval(urls)
        tweet['urls'] = json.dumps(urls)
      except Exception as e:
        traceback.print_exc()
        tweet['urls'] = ''
         
    if hashtags is not None and hashtags != '':
      try:
        hashtags = ast.literal_eval(hashtags)
        tweet['hashtags'] = json.dumps(hashtags)
      except Exception as e:
        traceback.print_exc()
        tweet['hashtags'] = ''
      
    new_tweets.append(tweet)
  
  datastore.store(new_tweets, 'twitter_tweets_cleaned', '')
Ejemplo n.º 2
0
def get_followings():
    users = list(datastore.load('khiem.twitter_users'))
    user_ids = []
    followings = []
    for i in range(0, len(users) - 1):
        user_ids.append(users[i].id)

    for i in range(500, 751):
        user = users[i]
        #friend_ids = api.GetFriendIDs()
        #for friend_id in friend_ids:
        # if friend_id in user_ids:
        #   followings.append({"user_id": user["id"], "friend_id": friend_id})
        try:
            follower_ids = api.GetFollowerIDs(userid=user["id"])["ids"]
            print str(i) + ": Found " + str(
                len(follower_ids)) + " followers of " + user.name
            for follower_id in follower_ids:
                if follower_id in user_ids:
                    followings.append({
                        "user_id": follower_id,
                        "friend_id": user.id
                    })
        except Exception as e:
            print e
            continue

    datastore.store(followings, 'twitter_followings_500_750',
                    'Twitter followings')
    print "Successfully persist followings: " + str(len(followings)) + " items"
Ejemplo n.º 3
0
def rebuild_tweets():
    tweets = list(datastore.load('twitter_tweets'))
    new_tweets = []
    for tweet in tweets:
        urls = tweet['urls']
        hashtags = tweet['hashtags']
        if urls is not None and urls != '':
            try:
                urls = ast.literal_eval(urls)
                tweet['urls'] = json.dumps(urls)
            except Exception as e:
                traceback.print_exc()
                tweet['urls'] = ''

        if hashtags is not None and hashtags != '':
            try:
                hashtags = ast.literal_eval(hashtags)
                tweet['hashtags'] = json.dumps(hashtags)
            except Exception as e:
                traceback.print_exc()
                tweet['hashtags'] = ''

        new_tweets.append(tweet)

    datastore.store(new_tweets, 'twitter_tweets_cleaned', '')
Ejemplo n.º 4
0
def get_followings():
  users = list(datastore.load('khiem.twitter_users'))
  user_ids = []
  followings = []
  for i in range(0,len(users)-1):
    user_ids.append(users[i].id)
  
  for i in range(500, 751):
    user = users[i]
    #friend_ids = api.GetFriendIDs()
    #for friend_id in friend_ids:
     # if friend_id in user_ids:
     #   followings.append({"user_id": user["id"], "friend_id": friend_id})
    try: 
      follower_ids = api.GetFollowerIDs(userid=user["id"])["ids"]
      print str(i) + ": Found " + str(len(follower_ids)) + " followers of " + user.name
      for follower_id in follower_ids:
        if follower_id in user_ids:
          followings.append({"user_id": follower_id, "friend_id": user.id})
    except Exception as e:
      print e
      continue
        
  datastore.store(followings, 'twitter_followings_500_750', 'Twitter followings')
  print "Successfully persist followings: " + str(len(followings)) + " items"
Ejemplo n.º 5
0
def getTweets(start, stop):
    tweet_entities = [
        "id", "truncated", "hashtags", "urls", "user_mentions", "text",
        "source", "retweet_count", "created_at", "user", "favorited"
    ]
    users = list(datastore.load('twitter_users_cleaned'))
    total_users = len(users)
    if (stop - start > 300):
        print "Max (stop-start) is 300"
        return
    users = users[start:stop]
    data = []
    index = start
    try:
        for u in users:
            print str(index) + ": Getting tweets for user " + u['name']
            try:
                tweets = getTopTweets(userid=int(u['id']))
                if (tweets is not None):
                    print "Fetched " + str(
                        len(tweets)) + " tweets of user " + u['name']
                    for t in tweets:
                        st = t.AsDict()
                        obj = {}
                        for prop in tweet_entities:
                            if prop not in st:
                                if prop == "retweet_count" or prop == "favorited" or prop == "truncated":
                                    obj[prop] = 0
                                else:
                                    obj[prop] = ''
                            elif prop == "user":
                                user_obj = dict(st["user"])
                                obj['user_id'] = user_obj['id']
                                obj['user_screen_name'] = user_obj[
                                    'screen_name']
                                obj['user_name'] = user_obj['name']
                            else:
                                obj[prop] = st[prop]
                        data.append(obj)
                else:
                    print "Empty tweets for user " + u['name']
            except Exception as e:
                print 'Exception occurs when fetch tweets for user ' + u['name']
                traceback.print_exc()
            index = index + 1
    except Exception as e:
        traceback.print_exc()
        print str(datetime.now())
        return

    datastore.store(
        data, 'twitter_tweets_cleaned_' + str(start) + '_' + str(stop - 1), '')

    print str(
        datetime.now()) + ' Result at segment ' + str(start) + ' -> ' + str(
            stop - 1) + '/' + str(total_users) + ': Total ' + str(
                len(data)) + ' tweets'
def grabContent(start, stop):
    if stop < 0 or start < 0 or stop < start:
        print "Invalid arguments"
        return

    articles = []
    tweets = list(datastore.load('twitter_tweets', start, stop - start))
    #tweets = tweets[start:stop]
    print "Loaded " + str(
        len(tweets)) + " tweets from `twitter_tweets` table, " + str(
            start) + " -> " + str(stop - 1)
    index = start
    url_counter = 0
    for tweet in tweets:
        tweet_id = tweet['id']
        user_id = tweet['user_id']
        user_screen_name = tweet['user_screen_name']
        text = tweet['text']
        urls = tweet['urls']
        print str(index) + ": Processing tweet " + str(tweet_id)
        if urls is not None and urls.strip() != '':
            #urls = dict(json.loads(urls, encoding="utf-8"))
            try:
                urls = ast.literal_eval(urls)
            except Exception as e:
                continue
            print "Found " + str(len(urls)) + " urls"
            for url in urls:
                url_counter = url_counter + 1
                display_url = urls[url]
                print "Grabbing content from " + url + " ..."
                content = None
                try:
                    content = contentdetector.upgradeLink(url)
                except Exception as e:
                    print "Exception occurs when trying to grab content from " + url
                    traceback.print_exc()
                if content is not None and content.strip() != '':
                    articles.append({
                        "tweet_id": tweet_id,
                        "user_id": user_id,
                        "user_screen_name": user_screen_name,
                        "text": text,
                        "url": url,
                        "display_url": display_url,
                        "content": content
                    })
        index = index + 1
    print "Processed tweets " + str(start) + " -> " + str(stop - 1)
    print "Total urls processed: " + str(url_counter)
    print "Total articles grabbed: " + str(len(articles))

    datastore.store(articles,
                    'twitter_articles_' + str(start) + '_' + str(stop - 1), '')
Ejemplo n.º 7
0
def rebuild_users_level1():
    users = list(datastore.load('khiem.twitter_users_level1'))
    users = users[3:]
    user_ids = []
    for u in users:
        user_ids.append(u["id"])

    users = api.UsersLookup(user_id=user_ids)
    user_data = []
    for u in users:
        user_data.append(u.AsDict())
    print "Successfully retrieve " + str(len(users)) + " users info"
    datastore.store(user_data, 'twitter_users_level1_rebuild', '')
Ejemplo n.º 8
0
def rebuild_users_level1():
  users = list(datastore.load('khiem.twitter_users_level1'))
  users = users[3:]
  user_ids = []
  for u in users:
    user_ids.append(u["id"])
  
  users = api.UsersLookup(user_id=user_ids)
  user_data = []
  for u in users:
    user_data.append(u.AsDict())
  print "Successfully retrieve " + str(len(users)) + " users info"
  datastore.store(user_data, 'twitter_users_level1_rebuild', '')
Ejemplo n.º 9
0
def getTweets(start, stop):
  tweet_entities = ["id", "truncated", "hashtags", "urls", "user_mentions", "text", "source", "retweet_count", "created_at", "user", "favorited"]
  users = list(datastore.load('twitter_users_cleaned'))
  total_users = len(users)
  if (stop - start > 300):
    print "Max (stop-start) is 300"
    return
  users = users[start:stop]
  data = []
  index = start
  try:
    for u in users:
      print str(index) + ": Getting tweets for user " + u['name']
      try:
        tweets = getTopTweets(userid=int(u['id']))
        if (tweets is not None):
          print "Fetched " + str(len(tweets)) + " tweets of user " + u['name']
          for t in tweets:
            st = t.AsDict()
            obj = {}
            for prop in tweet_entities:
              if prop not in st:
                if prop == "retweet_count" or prop == "favorited" or prop == "truncated":
                  obj[prop] = 0
                else:
                  obj[prop] = ''
              elif prop == "user":
                user_obj = dict(st["user"])
                obj['user_id'] = user_obj['id']
                obj['user_screen_name'] = user_obj['screen_name']
                obj['user_name'] = user_obj['name']
              else:
                obj[prop] = st[prop]
            data.append(obj)
        else:
          print "Empty tweets for user " + u['name']
      except Exception as e:
        print 'Exception occurs when fetch tweets for user ' + u['name']
        traceback.print_exc()  
      index = index + 1
  except Exception as e:
    traceback.print_exc()
    print str(datetime.now())
    return
  
  
  
  datastore.store(data, 'twitter_tweets_cleaned_' + str(start) + '_' + str(stop-1), '')
  
  print str(datetime.now()) +  ' Result at segment ' + str(start) + ' -> ' + str(stop-1) + '/' + str(total_users) + ': Total ' + str(len(data)) + ' tweets'
Ejemplo n.º 10
0
def build_graph_level3(start, stop):
  if (stop - start > 15):
    print "The [start, stop) range must not greater than 15, or (stop-start) <= 15"
    return

  users_l2 = list(datastore.load('khiem.twitter_users_level2'))
  users_l2 = users_l2[start:stop]
  user_ids = []
   
  followings = []
  users_l3 = []
  index = start
  try:
    for user in users_l2:
      print str(index) + ": Find top friends for user " + user['name']
      try:
        friends = findTopFriends(user["id"])
        for friend in friends:
          followings.append({"user_id": user["id"], "friend_id": friend.id})
          if friend.id not in user_ids:
            user_ids.append(friend.id)
            users_l3.append(friend.AsDict())
      except Exception as e:
        print "Exception occurs when find top friends of user " + user['name']
        traceback.print_exc()
      
      try:    
        print str(index) + ": Find top followers for user " + user['name']
        followers = findTopFollowers(user['id'])
        for follower in followers:
          followings.append({"user_id": follower.id, "friend_id": user['id']})
          if follower.id not in user_ids:
            user_ids.append(follower.id)
            users_l3.append(follower.AsDict())
      except Exception as e:
        print "Exception occurs when find top followers of user " + user['name']
        traceback.print_exc()
      index = index + 1
  except Exception as e:
    traceback.print_exc()
    print str(datetime.now())
    return
  
  datastore.store(users_l3, "twitter_users_level3_" + str(start) + "_" + str(stop-1), "")
  datastore.store(followings, "twitter_followings_level3_" + str(start) + "_" + str(stop-1), "")
  
  print str(datetime.now()) + ": Result at level 3, segment " + str(start) + "->" + str(stop-1) + ": " + str(len(users_l3)) + " users, " + str(len(followings)) + " followings"
def main():
  gcal_template = (
      "http://www.google.com/calendar/feeds/%s/public/basic?"
      "showdeleted=true&updated-min=2011-08-01T01:00:00-08:00&max-results=1000")
  threads = [FetchThread(gcal_template % cal.calendar_id)
             for cal in config.calendar_ids]
  map(FetchThread.start, threads)  # Do work.
  map(FetchThread.join, threads)  # Wait for work to end.
  events = []
  for t in threads:
    events.extend(gcal.parse_feed(t.document))

  posts = blogger.GetPosts()

  store = datastore.load("database.db")
  store.update(events, posts)
  store.save()
  store.close()
Ejemplo n.º 12
0
def grabContent(start, stop):
  if stop < 0 or start < 0 or stop < start:
    print "Invalid arguments"
    return
  
  articles = []
  tweets = list(datastore.load('twitter_tweets', start, stop-start))
  #tweets = tweets[start:stop]
  print "Loaded " + str(len(tweets)) + " tweets from `twitter_tweets` table, " + str(start) + " -> " + str(stop-1)
  index = start
  url_counter = 0
  for tweet in tweets:
    tweet_id = tweet['id']
    user_id = tweet['user_id']
    user_screen_name = tweet['user_screen_name']
    text = tweet['text']
    urls = tweet['urls']
    print str(index) + ": Processing tweet " + str(tweet_id)
    if urls is not None and urls.strip() != '':
      #urls = dict(json.loads(urls, encoding="utf-8"))
      try:
        urls = ast.literal_eval(urls)
      except Exception as e:
        continue
      print "Found " + str(len(urls)) + " urls"
      for url in urls:
        url_counter = url_counter + 1
        display_url = urls[url]
        print "Grabbing content from " + url + " ..."
        content = None
        try:
          content = contentdetector.upgradeLink(url)
        except Exception as e:
          print "Exception occurs when trying to grab content from " + url
          traceback.print_exc()
        if content is not None and content.strip() != '':
          articles.append({ "tweet_id": tweet_id, "user_id": user_id, "user_screen_name": user_screen_name,
                          "text": text, "url": url, "display_url": display_url, "content": content  })
    index = index + 1
  print "Processed tweets " + str(start) + " -> " + str(stop-1)
  print "Total urls processed: " + str(url_counter)
  print "Total articles grabbed: " + str(len(articles))
  
  datastore.store(articles, 'twitter_articles_' + str(start) + '_' + str(stop-1), '')
Ejemplo n.º 13
0
def _get_tweets():
  tweets = []
  users = list(datastore.load('khiem.twitter_users'))
  
  for i in range(300,501):
    try:
      print str(i) + ': Getting tweets from user ' + users[i]['name']
      statuses = api.GetUserTimeline(screen_name=users[i]['screen_name'], count=200)
      for st in statuses:
        tweets.append(st.AsDict())
      print 'Processed ' + str(len(statuses)) + ' tweets for user ' + users[i]['name']
    except Exception as e:
      print e
      print 'Exception occurs when process user ' + users[i]['name'] + '. Skip.'
      continue
  
  print 'Storing tweets for ' + str(i) + ' users'
  # store all tweets
  datastore.store(tweets, 'twitter_tweets_300_500', 'Tweets')
Ejemplo n.º 14
0
def build_graph_level2():
  # get users from level1, except the top 3 users from level0
  users_level1 = list(datastore.load('khiem.twitter_users_level1'))
  users_level1 = users_level1[3:]

  users_l2 = []
  followings = []
  user_ids = []
  # for each user find top friends
  for user in users_level1:
    print "Find top friends of user " + user["name"]
    friends = findTopFriends(user["id"])
    for friend in friends:
      followings.append({"user_id": user["id"], "friend_id": friend.id})
      if friend.id not in user_ids:
        user_ids.append(friend.id)
        users_l2.append(friend.AsDict())
  
  datastore.store(users_l2, "twitter_users_level2", "")
  datastore.store(followings, "twitter_followings_level2", "")
Ejemplo n.º 15
0
def build_graph_level2():
    # get users from level1, except the top 3 users from level0
    users_level1 = list(datastore.load('khiem.twitter_users_level1'))
    users_level1 = users_level1[3:]

    users_l2 = []
    followings = []
    user_ids = []
    # for each user find top friends
    for user in users_level1:
        print "Find top friends of user " + user["name"]
        friends = findTopFriends(user["id"])
        for friend in friends:
            followings.append({"user_id": user["id"], "friend_id": friend.id})
            if friend.id not in user_ids:
                user_ids.append(friend.id)
                users_l2.append(friend.AsDict())

    datastore.store(users_l2, "twitter_users_level2", "")
    datastore.store(followings, "twitter_followings_level2", "")
Ejemplo n.º 16
0
def _get_tweets():
    tweets = []
    users = list(datastore.load('khiem.twitter_users'))

    for i in range(300, 501):
        try:
            print str(i) + ': Getting tweets from user ' + users[i]['name']
            statuses = api.GetUserTimeline(screen_name=users[i]['screen_name'],
                                           count=200)
            for st in statuses:
                tweets.append(st.AsDict())
            print 'Processed ' + str(
                len(statuses)) + ' tweets for user ' + users[i]['name']
        except Exception as e:
            print e
            print 'Exception occurs when process user ' + users[i][
                'name'] + '. Skip.'
            continue

    print 'Storing tweets for ' + str(i) + ' users'
    # store all tweets
    datastore.store(tweets, 'twitter_tweets_300_500', 'Tweets')
def main(pretend=False, now=datetime.datetime.now()):
  fm = filemanager.FileManager()
  fm.moveouttheway(UPCOMING_EVENTS_TEMPLATE)
  fm.moveouttheway(WHATS_NEW_TEMPLATE)
  fm.moveouttheway(POST_TEMPLATE)
  fm.moveouttheway(EVENT_TEMPLATE)

  new_cal_entries = gcal.get_calendar_entries(GCAL_URL)
  new_blog_entries = tumblr.get_blog_entries(TUMBLR_URL)
  store = datastore.load(DATASTORE_FILE)
  store.update(new_cal_entries, new_blog_entries)
  store.save()
  store.close()

  upcoming_events_tpl = template.parse(fm.read(UPCOMING_EVENTS_TEMPLATE))
  upcoming_events = store.get_upcoming_events(now)
  upcoming_events_output = upcoming_events_tpl.render(upcoming_events)
  fm.save(UPCOMING_EVENTS_PAGE, upcoming_events_output)

  whats_new_tpl = template.parse(fm.read(WHATS_NEW_TEMPLATE))
  whats_new = store.get_whats_new(now)
  whats_new_output = whats_new_tpl.render(whats_new)
  fm.save(WHATS_NEW_PAGE, whats_new_output)

  post_tpl = template.parse(fm.read(POST_TEMPLATE))
  for post in store.get_blog_posts():
    post_output = post_tpl.render(post)
    fm.save(POST_FILE % post.title, post_output)

  event_tpl = template.parse(fm.read(EVENT_TEMPLATE))
  for event in store.get_events():
    event_output = event_tpl.render(event)
    fm.save(EVENT_FILE % event.title, event_output)

  if pretend:
    fm.show_diff()
  else:
    fm.commit()
Ejemplo n.º 18
0
def build_graph_level3(start, stop):
    if (stop - start > 15):
        print "The [start, stop) range must not greater than 15, or (stop-start) <= 15"
        return

    users_l2 = list(datastore.load('khiem.twitter_users_level2'))
    users_l2 = users_l2[start:stop]
    user_ids = []

    followings = []
    users_l3 = []
    index = start
    try:
        for user in users_l2:
            print str(index) + ": Find top friends for user " + user['name']
            try:
                friends = findTopFriends(user["id"])
                for friend in friends:
                    followings.append({
                        "user_id": user["id"],
                        "friend_id": friend.id
                    })
                    if friend.id not in user_ids:
                        user_ids.append(friend.id)
                        users_l3.append(friend.AsDict())
            except Exception as e:
                print "Exception occurs when find top friends of user " + user[
                    'name']
                traceback.print_exc()

            try:
                print str(
                    index) + ": Find top followers for user " + user['name']
                followers = findTopFollowers(user['id'])
                for follower in followers:
                    followings.append({
                        "user_id": follower.id,
                        "friend_id": user['id']
                    })
                    if follower.id not in user_ids:
                        user_ids.append(follower.id)
                        users_l3.append(follower.AsDict())
            except Exception as e:
                print "Exception occurs when find top followers of user " + user[
                    'name']
                traceback.print_exc()
            index = index + 1
    except Exception as e:
        traceback.print_exc()
        print str(datetime.now())
        return

    datastore.store(users_l3,
                    "twitter_users_level3_" + str(start) + "_" + str(stop - 1),
                    "")
    datastore.store(
        followings,
        "twitter_followings_level3_" + str(start) + "_" + str(stop - 1), "")

    print str(datetime.now()) + ": Result at level 3, segment " + str(
        start) + "->" + str(stop - 1) + ": " + str(
            len(users_l3)) + " users, " + str(len(followings)) + " followings"