Esempio n. 1
0
def pull_quoted(db, api, twitterapi):
    tweets = db.tweets.find(
        {
            'quoted_status_id': {
                '$gt': 0
            },
            'quote_pulled': None
        }, {'quoted_status_id': 1})
    if verbose():
        tweets = Bar("Processing:",
                     max=tweets.count(),
                     suffix='%(index)d/%(max)d - %(eta_td)s').iter(tweets)
    idlist = []
    for t in tweets:
        twid = t['quoted_status_id']
        if twid is None:
            db.tweets.update(t, {'$unset': {'quoted_status_id': 1}})
            print "point 1: this should never? be reached, i think"
            continue
        #if get_tracked(db, uid=t['user']['id']) is None or not is_greek(db, uid=t['user']['id']): continue
        orig = db.tweets.find_one({'id': twid})
        if orig:
            db.tweets.update(t, {'$set': {'quote_pulled': True}})
            continue
        if twid not in idlist:
            idlist.append(twid)
        if verbose(): print " ", twid
        if len(idlist) == 100:
            add100(db, api, twitterapi, idlist)
            idlist = []
    if len(idlist):
        add100(db, api, twitterapi, idlist)
Esempio n. 2
0
def pull_deleted(db, api, twitterapi, uid):
    if uid:
        tweets = db.tweets.find({'deleted': True, 'user.id': uid})
    else:
        tweets = db.tweets.find({'deleted': True})
    if verbose():
        tweets = Bar("Processing:",
                     max=tweets.count(),
                     suffix='%(index)d/%(max)d - %(eta_td)s').iter(tweets)
    idlist = []
    for t in tweets:
        twid = t['id']
        idlist.append(twid)
        if len(idlist) == 100:
            add100(db, api, twitterapi, idlist)
            idlist = []
    if len(idlist):
        add100(db, api, twitterapi, idlist)
Esempio n. 3
0
def pull_replied(db, api, twitterapi):
    #TODO: refine this to not be a full scan. see $lookup.
    tweets = db.tweets.find(
        {
            'in_reply_to_status_id': {
                '$gt': 0
            },
            'reply_pulled': None
        }, {
            'in_reply_to_status_id': 1,
            'in_reply_to_user_id': 1
        })
    if verbose():
        tweets = Bar("Processing:",
                     max=tweets.count(),
                     suffix='%(index)d/%(max)d - %(eta_td)s').iter(tweets)
    idlist = []
    for t in tweets:
        twid = t['in_reply_to_status_id']
        if twid is None:
            db.tweets.update(t, {'$unset': {'in_reply_to_status_id': 1}})
            print("point 1: this should never? be reached, i think")
            continue
        #if get_tracked(db, uid=t['user']['id']) is None or not is_greek(db, uid=t['user']['id']): continue
        orig = db.tweets.find_one({'id': twid})
        if orig:
            db.tweets.update(t, {'$set': {'reply_pulled': True}})
            if orig.get('deleted', False):
                if orig.get('user') is None or orig['user'].get(
                        'id'
                ) is None or t['in_reply_to_user_id'] != orig['user']['id']:
                    db.tweets.update(
                        {'id': twid},
                        {'$set': {
                            'user.id': t['in_reply_to_user_id']
                        }})
            continue
        idlist.append(twid)
        if verbose(): print(" ", twid)
        if len(idlist) == 100:
            add100(db, api, twitterapi, idlist)
            idlist = []
    if len(idlist):
        add100(db, api, twitterapi, idlist)
Esempio n. 4
0
def pull_favorited(db, api, twitterapi):
    favs = db.favorites.find({'pulled': None}).batch_size(100)
    idlist = []
    if verbose():
        favs = Bar("Processing:",
                   max=favs.count(),
                   suffix='%(index)d/%(max)d - %(eta_td)s').iter(favs)
    for f in favs:
        twid = f['tweet_id']
        if db.tweets.find_one({'id': twid}) is not None:
            db.favorites.update(f, {'$set': {'pulled': True}})
            continue
        idlist.append(twid)
        if verbose(): print " ", twid
        if len(idlist) == 100:
            add100(db, api, twitterapi, idlist)
            idlist = []
    if len(idlist):
        add100(db, api, twitterapi, idlist)
Esempio n. 5
0
  auth = tweepy.OAuthHandler(config.consumer_key, config.consumer_secret)
  auth.set_access_token(config.access_token, config.access_token_secret)
  api = tweepy.API(auth)

  for user in args:
    uid = long(user) if options.ids else None
    uname = None if options.ids else user
    u = lookup_user(db, uid, uname)
    if u is None:
      print uid, uname, "not found"
    if options.scan:
      tweets = db.tweets.find({'user.id': u['id'], 'deleted': None}).sort('created_at', 1)
      idlist = []
      for t in tweets:
        idlist.append(t['id'])
        if len(idlist) == 100:
          idlist = add100(db, api, twitterapi, idlist)
          print u'found {} deleted'.format(len(idlist))
          idlist = []
      idlist = add100(db, api, twitterapi, idlist)
      print u'found {} deleted'.format(len(idlist))
      idlist = []

    tweets = db.tweets.find({'deleted': True, 'user.id': u['id']}).sort('created_at', 1)
    if verbose():
      tweets = Bar("Processing:", max=tweets.count(), suffix = '%(index)d/%(max)d - %(eta_td)s').iter(tweets)
    for t in tweets:
      if options.nort and 'retweeted_status' in t: continue
      print u'{} {} {}: {}'.format(t.get('id', '-'), t.get('created_at', None), u['screen_name_lower'], t.get('text', '<not found>')).encode('utf-8')