Example #1
0
def anova_by_user_feature(krows,feature_name):
    ## first get rid of owners that don't have a keeping category -- bamf!
    krows = filter(lambda x : get_category(x) is not None, krows)
    owners = _get_owners_of_rows(krows)
    owners = _filter_user_ids_for_researchers(owners)

    owner_types = {}
    for cat in cats: [ owner_types.update({long(owner) : cat}) for owner in get_userids_with_cat(krows,cat) ]

    # print owner_types

    def compfeat(owner_id):
        result = getattr(wuw,feature_name)(owner_id).values()[0]
        # print "owner -- ", owner_id, " ", result
        return result

    feature_functions =  {
        "owner_type" : lambda ownid : owner_types.get(long(ownid),None),
        #"owner_id" : lambda ownid: ownid,
        feature_name : lambda ownid: compfeat(ownid) # lambda n: intent.get_feature_for_note(n["id"],feature_name)
        }

    # fmla = _make_aov_fmla('%s ~ owner_type + owner_id' % feature_name, owners, feature_functions, ['owner_id', 'owner_type'])
    fmla = aov.make_fmla('%s ~ owner_type' % feature_name, owners, feature_functions, ['owner_type'])

    aov.compute_averages(owners,feature_functions,feature_name,'owner_type')
    aovres = r('aov')(fmla)
    tsd = r('TukeyHSD')(aovres)
    ios = StringIO.StringIO()
    print >>ios,r.summary(tsd)
    return fmla,aovres,tsd,ios.getvalue()
Example #2
0
def aov(arows, feature_name, formula = "%s ~ primary + owner_id"):
    import jv3.study.keeping_labels
    get_note_owner = lambda nid: filter(lambda x :x[0]==nid, arows)[0][aci('owner_id')]
    feature_functions = {
        feature_name : lambda note_id: get_feature_for_note(note_id,feature_name),
        'primary' : lambda nid: get_note_role(arows,nid),
        'owner_id' : get_note_owner
    }
    note_ids = list(set([x[0] for x in arows]))
    # key element: b
    fmla = au.make_fmla_repeat_when_lists(formula % feature_name, note_ids, feature_functions, ['primary', 'owner_id'])
    au.compute_averages(note_ids,feature_functions,feature_name,'primary')    
    return fmla
Example #3
0
def anova_by_note_feature(krows,feature_name):
    owners = _get_owners_of_rows(krows)
    owners = _filter_user_ids_for_researchers(owners)

    owner_types = {}
    # populate owner -> cat mapping 
    for cat in cats: [ owner_types.update({long(owner) : cat}) for owner in get_userids_with_cat(krows,cat) ]

    # gets rid of dudes who have no category
    owners = list(owner_types.keys())
    print "SOURCE OWNER SET : ", len(owners)
    notes_of_owners = _get_all_notes_owned(owners)                                                  

    def compfeat(n,name):
        #print "COMPFEAT ", name, " ", n["id"]
        #if intent.get_feature_for_note(n["id"],feature_name) < 0:
        #print "getting feature for note ", n["id"], intent.get_feature_for_note(n["id"],feature_name), feature_name, n["contents"][:10], 
        return intent.get_feature_for_note(n["id"],name)

    feature_functions =  {
        "owner_type" : lambda n : owner_types.get(n["owner_id"],None),
        "owner_id" : lambda n: n["owner_id"],
        feature_name : lambda n: compfeat(n,feature_name) # lambda n: intent.get_feature_for_note(n["id"],feature_name)
        }

    print "notes of owners ", len(notes_of_owners)
    fmla = aov.make_fmla(
        '%s ~ owner_type + owner_id' % feature_name,
        notes_of_owners,
        feature_functions,
        ['owner_id','owner_type'])

    aov.compute_averages(notes_of_owners,feature_functions,feature_name,'owner_type')
    
    aovres = r('aov')(fmla)
    tsd = r('TukeyHSD')(aovres)
    ios = StringIO.StringIO()
    print >>ios,r.summary(tsd)
    return fmla,aovres,tsd,ios.getvalue()