def juxta_basicstats(u): ns = u.note_owner nvals = u.note_owner.values() stats = ( ("N", ns.count()), ## number of notes ("keep/del", "%s/%s" % (ns.filter(deleted=0).count(), ns.filter(deleted=1).count())), ("s:", exporter.makedate_usec(min([float(x["created"]) for x in nvals]))), ("l:", exporter.makedate_usec(max([float(x["edited"]) for x in nvals] + [float(x["created"]) for x in nvals]))), ("|words|", mean([ca.note_words(nvs)[1] for nvs in nvals])), ("|chars|", mean([len(nvs["contents"].strip()) for nvs in nvals])), ("|urls%|", sum([1 for nvs in nvals if ca.note_urls(nvs)[1] > 0]) / (1.0 * ns.count()) * 100), ("|#vers|", mean([float(nvs["version"]) for nvs in nvals])), ) return '<span class="userstats">%s</span>' % "; ".join( ['%s: <span class="val">%s</span>' % (k, v) for k, v in stats] )
def juxta_basicstats(u): ns = u.note_owner nvals = u.note_owner.values() stats = ( ("N", ns.count()), ## number of notes ("keep/del", "%s/%s" % (ns.filter(deleted=0).count(), ns.filter(deleted=1).count())), ("s:", exporter.makedate_usec(min([float(x['created']) for x in nvals]))), ("l:", exporter.makedate_usec( max([float(x['edited']) for x in nvals] + [float(x['created']) for x in nvals]))), ("|words|", mean([ca.note_words(nvs)[1] for nvs in nvals])), ("|chars|", mean([len(nvs['contents'].strip()) for nvs in nvals])), ("|urls%|", sum([1 for nvs in nvals if ca.note_urls(nvs)[1] > 0]) / (1.0 * ns.count()) * 100), ("|#vers|", mean([float(nvs['version']) for nvs in nvals]))) return '<span class="userstats">%s</span>' % "; ".join( ['%s: <span class="val">%s</span>' % (k, v) for k, v in stats])
first_word_action = lambda notevals, words: ("first_word_symbol", words[0] in actionWords) first_words_action = lambda notevals, words: ("first_2words_action", words[0] in actionWords or (len(words) > 1 and words[1] in actionWords))#['WDT','WP','WRB','WDT','VBZ']) first_3words_action = lambda notevals, words: ("first_3words_action", words[0] in actionWords or (len(words) > 1 and words[1] in actionWords) or (len(words) > 2 and words[2] in actionWords)) word_features = [first_word_action, first_words_action, first_word_stop, first_3words_action] #count_verbs = lambda notevals, words: ("count_verbs", ca.note_verbs(notevals)['note_verbs']) #count_urls = lambda notevals, words: ("count_urls", ca.note_urls(notevals)['note_urls']) #count_numbers = lambda notevals, words: ("count_numbers", ca.numbers(notevals)['numbers']) #count_todos = lambda notevals, words: ("count_todos", ca.note_todos(notevals)['note_todos']) #count_names = lambda notevals, words: ("count_names", ca.note_names(notevals)["names"]) # Counts seem to be overfitting majorly, but also increasing overall accuracy... count_features = [] ## Not using counts does better! #count_features = [count_verbs,count_urls,count_numbers,count_todos,count_names] contains_url = lambda notevals, words: ("contains_url", ca.note_urls(notevals)['note_urls'] > 0) contains_verbs = lambda notevals, words: ("3+_verbs", ca.note_verbs(notevals)['note_verbs'] >= 3) contains_dets = lambda notevals, words: ("1+_det", count_pos(words, ['DT']) > 0) contains_3_dets = lambda notevals, words: ("3+_det", count_pos(words, ['DT']) >=3) ## 6 / 17 split contains_adj = lambda notevals, words: ("1+_adj", count_pos(words, ['JJ', 'JJR', 'JJS']) > 0) contains_adv = lambda notevals, words: ("1+_adv", count_pos(words, ['RB','RBR','RBS']) > 0) #contains_linesZ = lambda notevals, words: ("contains_1_line", notevals['contents'].count('\n') == 0) contains_linesA = lambda notevals, words: ("contains_2+_lines", notevals['contents'].count('\n') >= 1) contains_linesB = lambda notevals, words: ("3+_lines", notevals['contents'].count('\n') >= 2) #contains_pronoun = lambda notevals, words: ("1+_pronouns", count_pos(words, ['PRP', 'PRP$']) >= 1) # contains_verbs .28 # contains_dets .5 # contains_3_dets .2187 # contains_adj .52 # contains_adv .49 # contains_linesB .35 # Revision 1
(len(words) > 2 and words[2] in actionWords)) word_features = [ first_word_action, first_words_action, first_word_stop, first_3words_action ] #count_verbs = lambda notevals, words: ("count_verbs", ca.note_verbs(notevals)['note_verbs']) #count_urls = lambda notevals, words: ("count_urls", ca.note_urls(notevals)['note_urls']) #count_numbers = lambda notevals, words: ("count_numbers", ca.numbers(notevals)['numbers']) #count_todos = lambda notevals, words: ("count_todos", ca.note_todos(notevals)['note_todos']) #count_names = lambda notevals, words: ("count_names", ca.note_names(notevals)["names"]) # Counts seem to be overfitting majorly, but also increasing overall accuracy... count_features = [] ## Not using counts does better! #count_features = [count_verbs,count_urls,count_numbers,count_todos,count_names] contains_url = lambda notevals, words: ("contains_url", ca.note_urls(notevals)[ 'note_urls'] > 0) contains_verbs = lambda notevals, words: ("3+_verbs", ca.note_verbs(notevals)[ 'note_verbs'] >= 3) contains_dets = lambda notevals, words: ("1+_det", count_pos(words, ['DT']) > 0 ) contains_3_dets = lambda notevals, words: ("3+_det", count_pos(words, ['DT']) >= 3) ## 6 / 17 split contains_adj = lambda notevals, words: ( "1+_adj", count_pos(words, ['JJ', 'JJR', 'JJS']) > 0) contains_adv = lambda notevals, words: ( "1+_adv", count_pos(words, ['RB', 'RBR', 'RBS']) > 0) #contains_linesZ = lambda notevals, words: ("contains_1_line", notevals['contents'].count('\n') == 0) contains_linesA = lambda notevals, words: ("contains_2+_lines", notevals[ 'contents'].count('\n') >= 1)
def one_or_no_url_redblk(note): note = n2vals(note) urls = ca.note_urls(note) if type(urls) == dict: return 'red' if urls['note_urls'] > 0 else 'black' return 'red' if urls[1] > 0 else 'black'