Beispiel #1
0
def get_nls_for_note(nid,nlread=None):
    # nlread = nl.read()
    if nlread is None: nlread = nl.read()
    N = filter(lambda n_: n_["id"] == str(nid), nlread["notes"])
    if len(N) == 0:
        print "nothing found for ",nid
        return None
    N = N[0]
    return [N[field] for field in nlread["label_fields"]]
Beispiel #2
0
def get_feature_for_note(nid,feature_name,coerce_fn=lambda x: float(x)):
    nlread = nl.read()
    N = filter(lambda n_: n_["id"] == str(nid), nlread["notes"])
    if len(N) == 0 or feature_name not in nlread['note_fields'] + nlread['feature_fields'] + nlread['label_fields']:
        ## debug
        # if len(N) == 0:
        #             print "warning unknown note computing ", nid, feature_name
        #         else:
        #             print "unknown feature name, trying to compute ", feature_name
        
        #print "result .... ", Note.objects.filter(id=nid).count(), nl.feature_named(feature_name,Note.objects.filter(id=nid).values()[0])
        return nl.compute_feature_named(feature_name,Note.objects.filter(id=nid).values()[0])

    N = N[0]
    return coerce_fn(N[feature_name])
Beispiel #3
0
def get_nl_dist_for_cats(arows):
    nlread = nl.read()
    freqs = dict([ (cat, nltk.FreqDist()) for cat in cats ] )
    ntotals = dict([(cat,0) for cat in cats])
    for cat in cats:
        for n in get_notes_of_cat(arows,cat):
            freqs[cat] = freqs[cat] + nltk.FreqDist(get_nls_for_note(n,nlread))
        ntotals[cat] = ntotals[cat] + len(get_notes_of_cat(arows,cat))

    # now do some printing

    for cat in cats:
        xx = [(t,y) for t,y in freqs[cat].iteritems()]
        xx.sort(key=lambda tagfreq:-tagfreq[1])
        print cat,"---",xx,"\nPERCENTAGES",[(t,y/(1.0*ntotals[cat])) for t,y in xx],"\n\n"
        
    return freqs,ntotals[cat] #,[(cat,dict([ (tag,y/(1.0*ntotals[cat])) for tag,y in freqs[cat].iteritems() ])) for cat in cats]
Beispiel #4
0
def _get_all_note_ids():
    return [ int(x["id"]) for x in nl.read()["notes"] ]