def update_comment_status(self, comment, post):
        if debugging:
            dLogger.log("<ThreadComment#%s>::update_comment_status()" %
                        self.ident)
            #dLogger.log("    comment: %s"%comment)
            #dLogger.log("    message: %s"%unicode(comment['message']))

        fbcomment = None
        try:
            try:
                fbcomment = FBComment.objects.get(fid=comment["id"])
            except ObjectDoesNotExist:
                fbcomment = FBComment(post=post)
                fbcomment.save()
            fbcomment.update_from_facebook(comment, post)
        except IntegrityError:
            try:
                fbcomment = FBComment.objects.get(fid=comment["id"])
                fbcomment.update_from_facebook(comment, post)
            except ObjectDoesNotExist:
                msg = u"ERROR! Comments already exist but not found%s for %s" % (
                    unicode(comment), post.fid if post.fid else "0")
                logger.exception(msg)
                if debugging: dLogger.exception(msg)
        except:
            msg = u"<p style='red'>Cannot update comment %s for %s</p>" % (
                unicode(comment), post.fid if post.fid else "0")
            logger.exception(msg)
            if debugging: dLogger.exception(msg)
        return fbcomment
def get_search_status_chart(request, harvester_id, search_term):
    try:
        search = get_list_or_404(TWSearch, term=search_term)[0]
        count = search.status_list.count()

        fromto = search.status_list.order_by(u"created_at")
        base = fromto[0].created_at if count != 0 else dt.datetime.now()
        order = 1
        while fromto[0].created_at == None and order < len(fromto):
            base = fromto[order].created_at
            order += 1
        to = fromto[count-1].created_at if count != 0 else dt.datetime.now()

        logger.debug("to: %s"%to)
        logger.debug("base: %s"%base)
        days = (to - base).days + 1
        dateList = [ base + dt.timedelta(days=x) for x in range(0,days) ]
        description = {"date_val": ("date", "Date"),
                       "status_count": ("number", "Status count"),
                      }
        data = []
        for date in dateList:
            c = search.status_list.filter(created_at__year=date.year,created_at__month=date.month,created_at__day=date.day).count()
            data.append({"date_val":date, "status_count":c})

        data_table = gviz_api.DataTable(description)
        data_table.LoadData(data)
        logger.debug(data_table.ToJSon())
        response =  HttpResponse(data_table.ToJSon(), mimetype='application/javascript')
        return response
    except:
        dLogger.exception("AN ERROR HAS OCCURED WHILE RENDERING STATUS CHART: SEARCH_TERM: %s"%search_term)
def get_fb_harvester_comment_list(request, call_type, harvester_id):
    querySet = None

    #columnIndexNameMap is required for correct sorting behavior
    columnIndexNameMap = {
        0: u'created_time',
        1: u'ffrom__username',
        2: u'post__ffrom__name',
        3: u'post__fid',
        4: u'message',
        5: u'likes',
        6: u'user_likes',
        7: u'ftype',
        8: u'ffrom__name',
        9: u'ffrom__fid',
        10: u'post__ffrom__fid',
    }
    try:
        if harvester_id == '0':
            querySet = FBComment.objects.all()
        else:
            harvester = get_list_or_404(FacebookHarvester, pk=harvester_id)[0]
            querySet = FBComment.objects.filter(
                post__user__harvester_in_charge=harvester).distinct()

    except ObjectDoesNotExist:
        pass
        dLogger.exception('ERROR OCCURED IN get_fb_harvester_comment_list:')
    #call to generic function from utils
    return get_datatables_records(request, querySet, columnIndexNameMap,
                                  call_type)
Esempio n. 4
0
def update_statuses(harvester, snh_search, status_id_list):
    if debugging:
        dLogger.log('update_statuses()')
        dLogger.log('    snh_search: %s' % snh_search)
        dLogger.log('    status_id_list: %s' % status_id_list)

    statuses_ids = status_id_list[0]
    for status_id in status_id_list[1:]:
        statuses_ids += ',%s' % status_id
    #dLogger.log('    statuses_ids: %s'%statuses_ids)
    statuses = api_statuses_lookup(harvester,
                                   statuses_ids,
                                   include_entities=True)

    for tw_status in sorted(statuses,
                            key=lambda x: x['created_at'],
                            reverse=True):
        #dLogger.log(tw_status['created_at'])

        tw_user = tw_status['user']
        snh_user, new = TWUser.objects.get_or_create(fid=tw_user['id'])
        if new:
            snh_user.screen_name = tw_user['screen_name']
            if debugging: dLogger.log('    new user created: %s' % snh_user)
            logger.info('New user created from search: %s' % snh_user)
            snh_user.harvester = harvester
            try:
                snh_user.save()
            except:
                dLogger.log('    GODDAMNIT')
                user = TWUser.objects.get(screen_name=tw_user['screen_name'])
                for post in user.postedStatuses.all():
                    post.user = snh_user
                    post.save()
                user.delete()
            snh_user.save()

        try:
            snh_status = TWStatus.objects.get(fid=tw_status['id_str'])
        except:
            snh_status = TWStatus.objects.create(fid=tw_status['id_str'],
                                                 user=snh_user)
            if debugging:
                dLogger.log('    new status created: %s' % snh_status)
            logger.debug('New status created from search: %s' % snh_status)

        try:
            snh_status.update_from_rawtwitter(tw_status, snh_user,
                                              harvester.keep_raw_statuses)
            update_search(snh_search, snh_status)
        except Exception as e:
            if debugging: dLogger.exception(e)
            logger.exception('AN ERROR HAS OCCURED WHILE SAVING TWEET TO DB:')
    snh_search.latest_status_harvested = snh_status
    snh_search.save()
Esempio n. 5
0
def manage_exception(retry_count, harvester, user):
    if debugging:
        dLogger.log(
            "manage_exception(retry_count: %s, harvester: %s, user: %s)" %
            (retry_count, harvester, user))
    msg = u"Exception for the harvester %s for %s. Retry:%d" % (
        harvester, unicode(user), retry_count)
    logger.exception(msg)
    if debugging: dLogger.exception(msg)
    retry_count += 1
    return (retry_count, retry_count > harvester.max_retry_on_fail)
Esempio n. 6
0
    def update_from_youtube(self, snh_video, snh_user, yt_comment):  #Comment
        if debugging:
            dLogger.log("<YTComment: '%s'>::update_from_youtube()" % self)
            #dLogger.pretty(yt_comment)

        model_changed = False

        fid = yt_comment['id']

        if self.fid != fid:
            self.fid = fid
            model_changed = True

        snippet = yt_comment['snippet']

        if self.video != snh_video:
            self.video = snh_video
            model_changed = True

        if self.user != snh_user:
            self.user = snh_user
            model_changed = True

        yt_published = snippet['publishedAt']
        date_val = datetime.strptime(yt_published[:-5], '%Y-%m-%dT%H:%M:%S')
        if self.published != date_val:
            self.published = date_val
            model_changed = True

        yt_updated = snippet['updatedAt']
        date_val = datetime.strptime(yt_updated[:-5], '%Y-%m-%dT%H:%M:%S')
        if self.updated != date_val:
            self.updated = date_val
            model_changed = True

        content = snippet['textDisplay'].encode('unicode_escape')
        content = re.sub(r'\\\\x..', '', content)
        if self.message != content:
            self.message = content
            model_changed = True

        like_count = snippet['likeCount']
        if self.like_count != like_count:
            self.like_count = like_count
            model_changed = True

        if model_changed:
            self.model_update_date = datetime.utcnow()
            try:
                self.save()
            except Exception, e:
                dLogger.log('    Error while saving:')
                dLogger.exception(e)
                dLogger.pretty(str(yt_comment).encode('unicode_escape'))
Esempio n. 7
0
def run_harvester_search(harvester):
    if debugging:
        dLogger.log("run_harvester_search(harvester: %s)" % (harvester))

    logger.info(u"START SEARCH: %s Stats:%s" %
                (harvester, unicode(harvester.get_stats())))
    try:
        all_twsearch = harvester.twsearch_to_harvest.all()
        search_all_terms(harvester, all_twsearch)
    except twitter.TwitterError, e:
        msg = u"ERROR for %s" % harvester
        logger.exception(msg)
        if debugging: dLogger.exception(msg)
Esempio n. 8
0
    def handle(self, *args, **options):

        me = singleton.SingleInstance(flavor_id="crontw")

        try:
            logger.info("Will run the Twitter harvesters.")
            twitterch.run_twitter_harvester()
        except:
            print "Global failure. exception logged in 'twitter.log'"
            msg = u"Highest exception for the twitter cron. Not good."
            logger.exception(msg)
            dLogger.exception('TOP LEVEL ERROR:')

        logger.info("The harvest has end for the Twitter harvesters." +
                    "     " * 200)
Esempio n. 9
0
def update_user_batch(harvester, user_batch):
    if debugging:
        dLogger.log("update_user_batch(%d items)" %
                    (len(user_batch) if user_batch else 0))
    userList = []
    userObjects = {}
    try:
        for user in user_batch:
            userList.append(user.screen_name)
            if user.fid:
                userObjects[user.fid] = user
        #dLogger.log('    userObjects: %s'%userObjects)
        twModels = harvester.api_call('UsersLookup', {
            'screen_name': userList,
            'include_entities': True
        })
        harvester.remaining_user_lookup_hits -= 1
        harvester.save()

        for twModel in twModels:
            #dLogger.pretty(twModel.AsDict())
            try:
                userObjects[twModel.id].update_from_twitter(twModel)
            except KeyError:
                try:
                    snh_user = TWUser.objects.get(fid=twModel.id)
                except:
                    dLogger.log('    GETTING USER BY SCREEN_NAME')
                    snh_user = TWUser.objects.get(
                        screen_name=twModel.screen_name)
                try:
                    snh_user.update_from_twitter(twModel)
                except:
                    dLogger.log('    NEED TO TRANSFER USER!')
                    user = TWUser.objects.get(screen_name=twModel.screen_name)
                    for status in user.postedStatuses.all():
                        status.user = snh_user
                        status.save()
                    user.delete()
                    snh_user.update_from_twitter(twModel)

            except:
                if debugging:
                    dLogger.exception("ERROR UPDATING FROM TWITTER: %s" %
                                      twModel.screen_name)
                pass
    except:
        if debugging: dLogger.exception("ERROR WHILE UPDATING USER BATCH:")
def update_user_status_from_batch(harvester, snhuser, status):
    #if debugging:
    #dLogger.log("update_user_status_from_batch()")
    try:
        res = FBResult()
        res.harvester = harvester
        res.result = status
        res.ftype = "FBPost"
        res.fid = status["id"]
        res.parent = snhuser.fid
        res.save()
    except:
        if debugging:
            dLogger.exception('ERROR WHILE CREATING A NEW FBRESULT<FBPOST>:')
            dLogger.log('    snhuser: %s' % snhuser)
            dLogger.log('    status: %s' % status)
        logger.debug('Error while adding %s\'s status')
def api_one_zero(request, command):
    try:
        dLogger.log(command)
        if command == 'authent': return authent(request)
        elif 'oauth' in request.GET and request.GET[
                'oauth'] == DEFAULT_OAUTH_KEY:
            return command_management(command, request)
        else:
            return error(
                'UnauthentifiedError',
                'You must use an authentification token to use AspirAPI',
                command)
    except Exception as e:
        dLogger.exception('Error occured in an API view')
        dLogger.log(request)
        return error('UnknownServerError',
                     'An error has occured while proceeding the request',
                     command)
def run_harvester_v3(harvester):
    if debugging:
        dLogger.log("run_harvester_v3()")

    harvester.start_new_harvest()
    try:
        compute_results(harvester)
        update_user_batch(harvester)
        if harvester.harvester_name != 'FBUser Updater':
            update_user_statuses_batch(harvester)
        compute_results(harvester)
    except:
        logger.exception(u"EXCEPTION: %s" % harvester)
        if debugging: dLogger.exception(u"EXCEPTION: %s" % harvester)
    finally:
        #usage = psutil.virtual_memory()
        harvester.end_current_harvest()
        logger.info(u"End: %s Stats:%s" %
                    (harvester, unicode(harvester.get_stats())))
    def update_user_status(self, fbstatus, user):
        if debugging:
            dLogger.log("<ThreadStatus#%s>::update_user_status()" % self.ident)
            dLogger.log("    id: %s" % fbstatus["id"])

        snh_status = None
        try:
            try:
                snh_status = FBPost.objects.get(fid=fbstatus["id"])
            except ObjectDoesNotExist:
                snh_status = FBPost(user=user)
                snh_status.save()
                if debugging:
                    dLogger.log(
                        "    New empty status created, to be processed.")
            snh_status.update_from_facebook(fbstatus, user)
            likes_list = FBResult.objects.filter(ftype="FBPost.likes").filter(
                parent=fbstatus["id"])
            all_likes = []
            if debugging:
                dLogger.log('    likes_list: %s, parent: %s' %
                            (likes_list, fbstatus["id"]))
            for likes in likes_list:
                all_likes += eval(likes.result)
            snh_status.update_likes_from_facebook(all_likes)
            likes_list.delete()
            #if debugging: dLogger.log("    deleted likes_List %s"%likes_list)
        except IntegrityError:
            try:
                snh_status = FBPost.objects.get(fid=fbstatus["id"])
                snh_status.update_from_facebook(fbstatus, user)
            except ObjectDoesNotExist:
                msg = u"<p style='red'>ERROR! Post already exist but not found %s for %s</p>" % (
                    unicode(fbstatus), user.fid if user.fid else "0")
                logger.exception(msg)
                if debugging: dLogger.exception(msg)
        except:
            msg = u"<p style='red'>Cannot update status %s for %s</p>" % (
                unicode(fbstatus)[:100], user.fid if user.fid else "0")
            logger.exception(msg)
            if debugging: dLogger.exception(msg)
        return snh_status
def fill_db():
    try:
        dLogger.log('fill_db()')
        all_queries = FBResult.objects.all()
        count = FBResult.objects.count()
        BAR_LENGTH = 50
        print 'db_count: %i\n' % count,
        print 'Progress:[%s]0%%' % (' ' * BAR_LENGTH),
        for i in range(0, count - 1):
            if i % 100 == 0:
                print '\r',
                print 'Progress:[%s%s]%s/%s' % (
                    '#' * (i * BAR_LENGTH // count), ' ' *
                    (BAR_LENGTH - i * BAR_LENGTH // count), i, count),
                if i % 1000 == 0:
                    print '\r\r',
                    print 'db_count: %i\n' % FBResult.objects.count(),
            create_post(all_queries[i])
    except:
        dLogger.exception('EXCEPTION:')
Esempio n. 15
0
def api_statuses_lookup(harvester, ids, include_entities=False):
    if debugging:
        dLogger.log('api_statuses_lookup()')
        #dLogger.log('    ids: %s'%ids)

    url = 'https://api.twitter.com/1.1/statuses/lookup.json?id=%s' % ids
    if include_entities:
        url += '&include_entities=1'

    auth = OAuth1(harvester.consumer_key, harvester.consumer_secret,
                  harvester.access_token_key, harvester.access_token_secret)

    response = []
    try:
        response = requests.get(url, auth=auth).json()
        harvester.remaining_search_hits -= 1
        harvester.save()
    except Exception as e:
        dLogger.exception(e)

    return response
    def run(self):
        if debugging:
            dLogger.log("<ThreadStatus#%s>::run()" % self.ident)

        statuscount = 0
        logger.info(u"ThreadStatus %s. Start." % self)
        while True:
            try:
                fid = self.queue.get()
                fbpost = FBResult.objects.filter(fid=fid).filter(
                    ftype="FBPost")[0]
                user = FBUser.objects.get(fid=fbpost.parent)
                rez = eval(fbpost.result)
                snh_status = self.update_user_status(rez, user)
                fbpost.delete()
                #if debugging: dLogger.log("    deleted FBStatus result %s"%fbpost)
                qsize = self.queue.qsize()
                if debugging: dLogger.log("    %s Posts left in queue" % qsize)
                if qsize % 100 == 0:
                    logger.info("    less than %s posts left in queue" %
                                self.queue.qsize())
                #signals to queue job is done
            except ObjectDoesNotExist:
                logger.exception("DEVED %s %s" % (fbpost.parent, fbpost.ftype))
                if debugging: dLogger.exception(msg)
            except Queue.Empty:
                logger.info(u"ThreadStatus %s. Queue is empty." % self)
                break
            except:
                msg = u"ThreadStatus %s. Error" % self
                logger.exception(msg)
                if debugging: dLogger.exception(msg)
                self._Thread__stop()
            finally:
                self.queue.task_done()
        logger.info(u"ThreadStatus %s. End." % self)
        if debugging: dLogger.log("    <ThreadStatus#%s> ended" % self.ident)
    def run(self):
        if debugging: dLogger.log("<ThreadComment#%s>::run()" % self.ident)

        logger.info(u"ThreadComment %s. Start." % self)
        while True:

            try:
                fid = self.queue.get()
                if fid:
                    fbcomment = FBResult.objects.filter(fid=fid)[0]
                    post = FBPost.objects.get(fid=fbcomment.parent)
                    self.update_comment_status(eval(fbcomment.result), post)

                    fbcomment.delete()
                    #if debugging: dLogger.log("    deleted fbcomment result %s"%fbcomment)
                    qsize = self.queue.qsize()
                    if debugging:
                        dLogger.log("    %s Comments left in queue" % qsize)
                    if qsize % 10000 == 0:
                        logger.info("    less than %s comments left in queue" %
                                    qsize)
                else:
                    logger.error(u"ThreadComment %s. fid is none! %s." %
                                 (self, fid))
                #signals to queue job is done
            except Queue.Empty:
                logger.info(u"ThreadComment %s. Queue is empty." % self)
                break
            except:
                msg = u"<p style='red'>ThreadComment %s. Error.</p>" % self
                logger.exception(msg)
                if debugging: dLogger.exception(msg)
            finally:
                self.queue.task_done()
        logger.info(u"ThreadComment %s. End." % self)
        if debugging: dLogger.log("    <ThreadComment#%s> ended" % self.ident)
def get_fb_harvester_post_list(request, call_type, harvester_id):
    querySet = None
    #dLogger.log('harvester_id: %s'%harvester_id)

    #columnIndexNameMap is required for correct sorting behavior
    columnIndexNameMap = {
        0: u'created_time',
        1: u'fid',
        2: u'ffrom__username',
        3: u'name',
        4: u'description',
        5: u'caption',
        6: u'message',
        7: u'link__original_url',
        8: u'ftype',
        9: u'likes_count',
        10: u'shares_count',
        11: u'comments_count',
        12: u'application_raw',
        13: u'updated_time',
        14: u'story',
        15: u'ffrom__name',
        16: u'ffrom__fid',
    }
    try:
        if harvester_id == '0':
            querySet = FBPost.objects.all()
        else:
            harvester = get_list_or_404(FacebookHarvester, pk=harvester_id)[0]
            querySet = FBPost.objects.filter(
                user__harvester_in_charge=harvester)
    except:
        dLogger.exception("EXCEPTION OCCURED IN get_fb_harvester_post_list")
    #call to generic function from utils
    return get_datatables_records(request, querySet, columnIndexNameMap,
                                  call_type)
Esempio n. 19
0
def manage_twitter_exception(retry_count, harvester, user, tex):
    if debugging: dLogger.log("manage_twitter_exception()")

    retry_count += 1
    need_a_break = retry_count > harvester.max_retry_on_fail

    if unicode(tex).find(u"Sorry, that page does not exist."):
        user.error_triggered = True
        user.save()
        need_a_break = True
        msg = u"Exception for the harvester %s for %s. Retry:%d. The user does not exists!" % (
            harvester, unicode(user), retry_count)
        logger.exception(msg)
        if debugging: dLogger.exception(msg)
    elif unicode(tex) == u"Capacity Error":
        logger.debug(u"%s:%s. Capacity Error. Retrying." %
                     (harvester, unicode(user)))
    elif unicode(tex).startswith(u"Rate limit exceeded"):
        harvester.update_client_stats()
        msg = u"Exception for the harvester %s for %s. Retry:%d." % (
            harvester, unicode(user), retry_count)
        logger.exception(msg)
        if debugging: dLogger.exception(msg)
        raise
    elif unicode(tex) == u"{u'error': u'Invalid query'}" or unicode(
            tex) == u"Invalid query":
        logger.debug(u"%s:%s. Invalid query. Breaking." %
                     (harvester, unicode(user)))
        need_a_break = True
    elif unicode(tex) == u"Not authorized":
        logger.debug(
            u"Error occured in %s:%s, the user has disabled scrapping." %
            (harvester, unicode(user)))
        need_a_break = True
    else:
        msg = u"Exception for the harvester %s for %s. Retry:%d. %s" % (
            harvester, unicode(user), retry_count, tex)
        logger.exception(msg)
        if debugging: dLogger.exception(msg)
        user.error_triggered = True
        user.save()

    return (retry_count, need_a_break)
    def update_from_rawtwitter(self,
                               twitter_model,
                               user,
                               keepRaw,
                               twython=False):
        #if debugging:
        #dLogger.log("%s::update_from_rawtwitter()"%self)
        #dLogger.pretty(twitter_model)

        model_changed = False
        props_to_check = {
            u"fid": u"id",
            u"favorited": u"favorited",
            u"retweet_count": u"retweet_count",
            u"retweeted": u"retweeted",
            u"source": u"source",
            u"text": u"text",
            u"truncated": u"truncated",
        }

        date_to_check = ["created_at"]

        self.user = user

        for prop in props_to_check:
            prop_name = props_to_check[prop]
            if prop_name in twitter_model:
                tw_prop_val = twitter_model[prop_name]
                if self.__dict__[prop] != tw_prop_val:
                    self.__dict__[prop] = tw_prop_val
                    model_changed = True
                    #if debugging: dLogger.log('    %s has changed: %s'%(prop, self.__dict__[prop]))

        for prop in date_to_check:
            if prop in twitter_model:
                tw_prop_val = twitter_model[prop]
                format = '%a %b %d %H:%M:%S +0000 %Y'
                if twython:
                    format = '%a %b %d %H:%M:%S +0000 %Y'
                date_val = datetime.strptime(tw_prop_val, format)
                if self.__dict__[prop] != date_val:
                    self.__dict__[prop] = date_val
                    model_changed = True

        if "entities" in twitter_model:
            entities = twitter_model["entities"]
            if "hashtags" in entities:
                tw_prop_val = entities["hashtags"]
                for twtag in tw_prop_val:
                    tag = None
                    try:
                        tag = Tag.objects.get(text__exact=twtag["text"])
                    except:
                        pass

                    if tag is None:
                        tag = Tag(text=twtag["text"])
                        try:
                            tag.save()
                        except:
                            tag = Tag(
                                text=twtag["text"].encode('unicode-escape'))
                            tag.save()
                        self.hash_tags.add(tag)
                        model_changed = True
                    else:

                        if tag not in self.hash_tags.all():
                            self.hash_tags.add(tag)
                            model_changed = True

            if "urls" in entities:
                tw_prop_val = entities["urls"]
                for twurl in tw_prop_val:
                    url = None
                    try:
                        url = URL.objects.get(original_url__exact=twurl['url'])
                    except:
                        pass

                    if url is None:
                        url = URL(original_url=twurl['url'])
                        url.save()
                        self.text_urls.add(url)
                        model_changed = True
                    elif url not in self.text_urls.all():
                        self.text_urls.add(url)
                        model_changed = True

            if "user_mentions" in entities:
                tw_prop_val = entities["user_mentions"]
                for tw_mention in tw_prop_val:
                    usermention = None
                    try:
                        usermention = self.get_existing_user(
                            {"fid": tw_mention['id']})
                        if not usermention:
                            usermention = self.get_existing_user(
                                {'screen_name': tw_mention['screen_name']})
                        #if debugging: dLogger.log("    usermention: %s"%usermention)
                        if not usermention:
                            usermention = TWUser(
                                fid=tw_mention['id'],
                                screen_name=tw_mention['screen_name'],
                                harvester=user.harvester)
                        usermention.update_from_rawtwitter(tw_mention, twython)
                        usermention.save()
                        #if debugging: dLogger.log("    user created from user mention: %s"%usermention)
                    except:
                        if debugging:
                            dLogger.exception(
                                "Exception occured while saving user:")

                    if usermention is None:
                        usermention = TWUser(fid=tw_mention['id'],
                                             harvester=user.harvester)
                        usermention.update_from_rawtwitter(tw_mention, twython)
                        usermention.save()
                        self.user_mentions.add(usermention)
                        model_changed = True
                    else:
                        if usermention not in self.user_mentions.all():
                            self.user_mentions.add(usermention)
                            model_changed = True

        if model_changed:
            self.model_update_date = datetime.utcnow()
            self.error_on_update = False

            if keepRaw:
                raw_data = self.raw_twitter_response.all()
                if len(raw_data) > 0:
                    raw_data[0].data = twitter_model
                    raw_data[0].save()
                else:
                    raw_data = TWStatusRaw.objects.create(snh_status=self,
                                                          data=twitter_model)

            try:
                self.save()
            except:
                self.text = self.text.encode('unicode-escape')
                self.source = self.source.encode('unicode-escape')
                self.save()
    def update_from_twitter(self, twitter_model, user, keepRaw):
        #if debugging:
        #dLogger.log("update_from_twitter()")
        #dLogger.log("    twitter_model: %s"%twitter_model)

        model_changed = False
        props_to_check = {
            u"fid": u"id",
            u"favorited": u"favorited",
            u"retweet_count": u"retweet_count",
            u"retweeted": u"retweeted",
            u"source": u"source",
            u"text": u"text",
            u"truncated": u"truncated",
        }

        date_to_check = ["created_at"]

        self.user = user

        for prop in props_to_check:
            prop_name = "_" + props_to_check[prop]
            if prop_name in twitter_model.__dict__:
                tw_prop_val = twitter_model.__dict__[prop_name]
                if self.__dict__[prop] != tw_prop_val:
                    self.__dict__[prop] = tw_prop_val
                    model_changed = True
                    #if debugging: dLogger.log('    %s has changed: %s'%(prop, self.__dict__[prop]))

        for prop in date_to_check:
            prop_name = "_" + prop
            if prop_name in twitter_model.__dict__:
                tw_prop_val = twitter_model.__dict__[prop_name]
                date_val = datetime.strptime(tw_prop_val,
                                             '%a %b %d %H:%M:%S +0000 %Y')
                if self.__dict__[prop] != date_val:
                    self.__dict__[prop] = date_val
                    model_changed = True

        if "hashtags" in twitter_model.__dict__:
            tw_prop_val = twitter_model.__dict__["hashtags"]
            for twtag in tw_prop_val:
                tag = None
                try:
                    tag = Tag.objects.filter(text=twtag.text)[0]
                except:
                    pass

                if tag is None:
                    try:
                        tag = Tag(text=twtag.text)
                        tag.save()
                    except:
                        tag = Tag(text=twtag.text.encode('unicode-escape'))
                        tag.save()
                    self.hash_tags.add(tag)
                    model_changed = True
                else:

                    if tag not in self.hash_tags.all():
                        self.hash_tags.add(tag)
                        model_changed = True

        if "urls" in twitter_model.__dict__:
            tw_prop_val = twitter_model.__dict__["urls"]
            for twurl in tw_prop_val:
                url = None
                try:
                    url = URL.objects.filter(original_url=twurl.url)[0]
                except:
                    pass

                if url is None:
                    url = URL(original_url=twurl.url)
                    url.save()
                    self.text_urls.add(url)
                    model_changed = True
                else:

                    if url not in self.text_urls.all():
                        self.text_urls.add(url)
                        model_changed = True

        if "user_mentions" in twitter_model.__dict__:
            tw_prop_val = twitter_model.__dict__["user_mentions"]
            for tw_mention in tw_prop_val:
                usermention = None
                try:
                    usermention = self.get_existing_user(
                        {"fid": tw_mention.id})
                    #if debugging: dLogger.log("    usermention: %s"%usermention)
                    if not usermention:
                        usermention = self.get_existing_user(
                            {"screen_name": tw_mention.screen_name})
                    if not usermention:
                        usermention = TWUser(
                            fid=tw_mention.id,
                            screen_name=tw_mention.screen_name,
                            harvester=user.harvester)
                        usermention.update_from_twitter(tw_mention)
                        usermention.save()
                        if debugging:
                            dLogger.log(
                                "    user created from user mention: %s" %
                                usermention)
                except:
                    if debugging:
                        dLogger.exception(
                            "AN EXCEPTION OCCURED WHILE CREATING NEW USER:"******"    Status %s has changed. Updated" % self)