Beispiel #1
0
def train_multinomial_bayes(request):
    prof = request.user.get_profile()
    tp = TwitterUserProfile.objects.get(user=prof)
    tp.active_classifier = 'MultinomialBayesClassifier'
    tp.classifier_version = '0.1'
    tp.save()
    force_train(tp)
    return {'success': 'works?'}
Beispiel #2
0
def train_classifier(request):
    prof = request.user.get_profile()
    tp = TwitterUserProfile.objects.get(user=prof)
    force_train(tp)
    return {'success': 'works?'}
Beispiel #3
0
def cache_timeline_backfill(tp, twitter_tokens, statuses):
    """ Backfill cached timeline from the oldest tweet in statuses to
    the cached_time in TwitterUserProfile or 72 hours, whichever is sooner"""
    api = get_authorized_twython(twitter_tokens)
    cutoff_time = datetime.utcnow()-timedelta(hours=72)

    if not statuses:
        statuses = Status.construct_from_dicts(api.getFriendsTimeline(include_rts=True))

    backfill_maxid = min(statuses, key=lambda x: x.id).id
    backfill_newestid = max(statuses, key=lambda x: x.id).id

    # Maxid and minid indicate contiguous cached status ids
    minid = getattr(tp, 'cached_minid', 0)
    maxid = getattr(tp, 'cached_maxid', 0)

    # No new tweets at all
    if backfill_newestid == maxid: return

    # Only one page of new tweets - just cache these
    if backfill_maxid < maxid:
        cache_statuses(statuses, tp)
        return

    # Cache as far back as 800 tweets or 72 hours worth
    num_apicalls = 1
    finished = False
    total_num_statuses = len(statuses)
    while not finished:
        print "backfill minid: " + str(maxid)
        print "backfill maxid: " + str(backfill_maxid)


        recieved_statuses = Status.construct_from_dicts(
            api.getFriendsTimeline(count=200, include_rts=True,
                                   max_id=backfill_maxid, min_id=maxid))
        num_apicalls += 1
        total_num_statuses += len(recieved_statuses)

        statuses.extend(recieved_statuses)

        oldest_status = min(recieved_statuses, key=lambda x: x.id)
        if (oldest_status.created_at < cutoff_time or
            oldest_status.id <= maxid or
            num_apicalls >= 5): finished = True
        else: backfill_maxid = oldest_status.id

    # Set new minid, maxid for contiguous cached statuses
    if oldest_status.id <= maxid:
        tp.cached_minid = minid
    else:
        tp.cached_minid = oldest_status.id
    tp.cached_maxid = backfill_newestid
    tp.save()

    # If less than 50 rated statuses: force train user classifier
    if Rating.objects.filter(user=tp).count() < 50:
        force_train(tp)

#    print "num apicalls: " + str(num_apicalls)
    cache_statuses(statuses, tp)