Beispiel #1
0
def user_search(user, days_ago=None, nosmoothing=False):
    from jv3.study.content_analysis import activity_logs_for_user
    global search_cache
    global search_query_cache

    alogs = wuw.reduceRepeatLogsValues(
        activity_logs_for_user(user, None, days_ago))

    searches = []
    queries = []
    last_time = 0
    for al_i in range(len(alogs)):
        al = alogs[al_i]
        if al["action"] == 'search':
            try:
                query = JSONDecoder().decode(al["search"])
            except:
                continue
            if type(query) == dict:
                key = None
                if 'search' in query: key = 'search'
                if 'query' in query: key = 'query'
                if key is not None:
                    # no empty searches pls
                    if len(query[key].strip()) > 0 and nosmoothing or long(
                            al['when']) - long(last_time) > (
                                10 * 1000):  # 10 second smoothing
                        queries.append(query[key])
                        al['query'] = query[key]
                        al['hits'] = query.get('hits', [])
                        searches.append(al)
                        last_time = al['when']
        elif al["action"] == 'clear-search' and (
                nosmoothing or long(al['when']) - long(last_time) >
            (10 * 1000)):
            al['query'] = ''
            searches.append(al)
            last_time = al["when"]

    search_cache[user.id] = searches
    search_query_cache[user.id] = queries

    return searches, nltk.FreqDist(queries)
Beispiel #2
0
def user_search(user,days_ago=None,nosmoothing=False):
    from jv3.study.content_analysis import activity_logs_for_user
    global search_cache
    global search_query_cache

    alogs = wuw.reduceRepeatLogsValues(activity_logs_for_user(user,None,days_ago))

    searches = []
    queries = []
    last_time = 0
    for al_i in range(len(alogs)):
        al = alogs[al_i]
        if al["action"] == 'search':
            try:
                query = JSONDecoder().decode(al["search"])
            except:
                continue
            if type(query) == dict:
                key = None
                if 'search' in query: key = 'search'
                if 'query' in query: key = 'query'
                if key is not None:
                    # no empty searches pls
                    if len(query[key].strip()) > 0 and nosmoothing or long(al['when'])-long(last_time) > (10*1000):  # 10 second smoothing
                        queries.append(query[key])
                        al['query'] = query[key]
                        al['hits'] = query.get('hits',[])
                        searches.append(al)
                        last_time = al['when']
        elif al["action"] == 'clear-search' and (nosmoothing or long(al['when'])-long(last_time) > (10*1000)):
            al['query'] = ''
            searches.append(al)
            last_time = al["when"]

    search_cache[user.id] = searches
    search_query_cache[user.id] = queries

    return searches,nltk.FreqDist(queries)
Beispiel #3
0
def note_ss(note,filter_top=False):
    from jv3.study.content_analysis import activity_logs_for_user
    from jv3.study.ca_load import jid2nidforuser
    global __sigscroll_startend_cache_flat
    SSCF = __sigscroll_startend_cache_flat

    def compute_duration(note):
        def dur(send):
            if type(send) == tuple: return send[1]-send[0]
            return send        
        xd = SSCF.get(note["id"],[])
        if len(xd) > 1:
            return reduce(lambda x,y: dur(x)+dur(y),xd)
        elif len(xd) == 1:
            return dur(xd[0])
        return -1
    
    if note["id"] in SSCF :
        return {'sigscroll_counts': len(SSCF.get(note["id"],[])), 'sigscroll_duration': compute_duration(note) }

    ## populate for this uer
    alogs = activity_logs_for_user(note["owner_id"],None)
    
    # if len(alogs) == 0:
    #     ## means we have no activitylogs for that user
    #     from jv3.study.content_analysis import _notes_to_features
    #     SSCF.update( [ (n["id"],[]) for n in [_notes_to_values(x) for x in Note.objects.filter(owner=n["owner_id"])] ] )
    #     return

    debug__all_nids = []
    
    next_is_top = True    
    toplist_jids = [] # things to block
    alogs.sort(key=lambda x: x["when"])
    print "activity logs", len(alogs)
    for al_i in range(len(alogs)):
        print al_i
        al = alogs[al_i]        
        if al["action"] == 'sidebar-open':
            next_is_top = True
            continue
        if not al["action"] == "significant-scroll":
            continue
        if al["search"] is None: 
            print "skipping"
            continue
        al["search"] = json.loads(al["search"])        
        if next_is_top:
            toplist_jids = [long(nv["id"]) for nv in al["search"]["note_visibilities"]]
            ##print "TOPLIST :: %s " % repr(toplist_jids)
            next_is_top = False 
        for nv in al["search"]["note_visibilities"]: 
            try :
                jid = int(nv["id"]) ## this returns the _jid_ not id!
                debug__all_nids.append(jid)
                ## omit nots that are at the top of the list
                if filter_top and jid in toplist_jids:
                    print "filter top and jid in toplist continuing"
                    continue                
                nid = jid2nidforuser(al["owner"],jid)  ## convert to NID (guaranteed unique)
                if nv.has_key("exitTime") and nv.has_key("entryTime"):
                    ap = SSCF.get(nid,[])
                    if nv["entryTime"] == nv["exitTime"]:
                        print " case 1 ",jid
                        ## this is to get around the bug in 0.4.5-7 which
                        ## results in (start,start) for no-scroll open-close, and search/idle
                        ap.append( (nv["entryTime"],long(al["when"])) )
                    else:
                        print "case 2 "
                        ap.append( (nv["entryTime"],nv["exitTime"]) )
                    SSCF[nid] = ap
            except:
                print "noncritical warn %s " % repr(sys.exc_info())
                pass
            
        ## filter all the newdudes
    print SSCF
    SSCF.update( dict([ (nid,adjacent_filtered(views)) for nid,views in SSCF.iteritems() ]) ) # if (nid in new_dudes) ] ) )

    print "all debug__all_nids",len(set(debug__all_nids))
    return {'sigscroll_counts': len(SSCF.get(note["id"],[])),'sigscroll_duration': compute_duration(note) }
Beispiel #4
0
def note_ss(note, filter_top=False):
    from jv3.study.content_analysis import activity_logs_for_user
    from jv3.study.ca_load import jid2nidforuser
    global __sigscroll_startend_cache_flat
    SSCF = __sigscroll_startend_cache_flat

    def compute_duration(note):
        def dur(send):
            if type(send) == tuple: return send[1] - send[0]
            return send

        xd = SSCF.get(note["id"], [])
        if len(xd) > 1:
            return reduce(lambda x, y: dur(x) + dur(y), xd)
        elif len(xd) == 1:
            return dur(xd[0])
        return -1

    if note["id"] in SSCF:
        return {
            'sigscroll_counts': len(SSCF.get(note["id"], [])),
            'sigscroll_duration': compute_duration(note)
        }

    ## populate for this uer
    alogs = activity_logs_for_user(note["owner_id"], None)

    # if len(alogs) == 0:
    #     ## means we have no activitylogs for that user
    #     from jv3.study.content_analysis import _notes_to_features
    #     SSCF.update( [ (n["id"],[]) for n in [_notes_to_values(x) for x in Note.objects.filter(owner=n["owner_id"])] ] )
    #     return

    debug__all_nids = []

    next_is_top = True
    toplist_jids = []  # things to block
    alogs.sort(key=lambda x: x["when"])
    print "activity logs", len(alogs)
    for al_i in range(len(alogs)):
        print al_i
        al = alogs[al_i]
        if al["action"] == 'sidebar-open':
            next_is_top = True
            continue
        if not al["action"] == "significant-scroll":
            continue
        if al["search"] is None:
            print "skipping"
            continue
        al["search"] = json.loads(al["search"])
        if next_is_top:
            toplist_jids = [
                long(nv["id"]) for nv in al["search"]["note_visibilities"]
            ]
            ##print "TOPLIST :: %s " % repr(toplist_jids)
            next_is_top = False
        for nv in al["search"]["note_visibilities"]:
            try:
                jid = int(nv["id"])  ## this returns the _jid_ not id!
                debug__all_nids.append(jid)
                ## omit nots that are at the top of the list
                if filter_top and jid in toplist_jids:
                    print "filter top and jid in toplist continuing"
                    continue
                nid = jid2nidforuser(
                    al["owner"], jid)  ## convert to NID (guaranteed unique)
                if nv.has_key("exitTime") and nv.has_key("entryTime"):
                    ap = SSCF.get(nid, [])
                    if nv["entryTime"] == nv["exitTime"]:
                        print " case 1 ", jid
                        ## this is to get around the bug in 0.4.5-7 which
                        ## results in (start,start) for no-scroll open-close, and search/idle
                        ap.append((nv["entryTime"], long(al["when"])))
                    else:
                        print "case 2 "
                        ap.append((nv["entryTime"], nv["exitTime"]))
                    SSCF[nid] = ap
            except:
                print "noncritical warn %s " % repr(sys.exc_info())
                pass

        ## filter all the newdudes
    print SSCF
    SSCF.update(
        dict([
            (nid, adjacent_filtered(views)) for nid, views in SSCF.iteritems()
        ]))  # if (nid in new_dudes) ] ) )

    print "all debug__all_nids", len(set(debug__all_nids))
    return {
        'sigscroll_counts': len(SSCF.get(note["id"], [])),
        'sigscroll_duration': compute_duration(note)
    }