Example #1
0
def juxta_basicstats(u):
    ns = u.note_owner
    nvals = u.note_owner.values()
    stats = (
        ("N", ns.count()),  ## number of notes
        ("keep/del", "%s/%s" % (ns.filter(deleted=0).count(), ns.filter(deleted=1).count())),
        ("s:", exporter.makedate_usec(min([float(x["created"]) for x in nvals]))),
        ("l:", exporter.makedate_usec(max([float(x["edited"]) for x in nvals] + [float(x["created"]) for x in nvals]))),
        ("|words|", mean([ca.note_words(nvs)[1] for nvs in nvals])),
        ("|chars|", mean([len(nvs["contents"].strip()) for nvs in nvals])),
        ("|urls%|", sum([1 for nvs in nvals if ca.note_urls(nvs)[1] > 0]) / (1.0 * ns.count()) * 100),
        ("|#vers|", mean([float(nvs["version"]) for nvs in nvals])),
    )
    return '<span class="userstats">%s</span>' % "; ".join(
        ['%s: <span class="val">%s</span>' % (k, v) for k, v in stats]
    )
Example #2
0
def juxta_basicstats(u):
    ns = u.note_owner
    nvals = u.note_owner.values()
    stats = (
        ("N", ns.count()),  ## number of notes
        ("keep/del", "%s/%s" %
         (ns.filter(deleted=0).count(), ns.filter(deleted=1).count())),
        ("s:", exporter.makedate_usec(min([float(x['created'])
                                           for x in nvals]))),
        ("l:",
         exporter.makedate_usec(
             max([float(x['edited'])
                  for x in nvals] + [float(x['created']) for x in nvals]))),
        ("|words|", mean([ca.note_words(nvs)[1] for nvs in nvals])),
        ("|chars|", mean([len(nvs['contents'].strip()) for nvs in nvals])),
        ("|urls%|", sum([1 for nvs in nvals if ca.note_urls(nvs)[1] > 0]) /
         (1.0 * ns.count()) * 100),
        ("|#vers|", mean([float(nvs['version']) for nvs in nvals])))
    return '<span class="userstats">%s</span>' % "; ".join(
        ['%s: <span class="val">%s</span>' % (k, v) for k, v in stats])
first_word_action = lambda notevals, words: ("first_word_symbol", words[0] in actionWords)
first_words_action = lambda notevals, words: ("first_2words_action", words[0] in actionWords or (len(words) > 1 and words[1] in actionWords))#['WDT','WP','WRB','WDT','VBZ'])
first_3words_action = lambda notevals, words: ("first_3words_action", words[0] in actionWords or (len(words) > 1 and words[1] in actionWords) or  (len(words) > 2 and words[2] in actionWords))

word_features = [first_word_action, first_words_action, first_word_stop, first_3words_action]

#count_verbs = lambda notevals, words: ("count_verbs", ca.note_verbs(notevals)['note_verbs'])
#count_urls = lambda notevals, words: ("count_urls", ca.note_urls(notevals)['note_urls'])
#count_numbers = lambda notevals, words: ("count_numbers", ca.numbers(notevals)['numbers'])
#count_todos = lambda notevals, words: ("count_todos", ca.note_todos(notevals)['note_todos'])
#count_names = lambda notevals, words: ("count_names",  ca.note_names(notevals)["names"])
# Counts seem to be overfitting majorly, but also increasing overall accuracy...
count_features = []  ## Not using counts does better!
#count_features = [count_verbs,count_urls,count_numbers,count_todos,count_names]

contains_url = lambda notevals, words: ("contains_url", ca.note_urls(notevals)['note_urls'] > 0)

contains_verbs = lambda notevals, words: ("3+_verbs", ca.note_verbs(notevals)['note_verbs'] >= 3)
contains_dets  = lambda notevals, words: ("1+_det", count_pos(words, ['DT']) > 0)

contains_3_dets  = lambda notevals, words: ("3+_det", count_pos(words, ['DT']) >=3) ## 6 / 17 split
contains_adj = lambda notevals, words: ("1+_adj", count_pos(words, ['JJ', 'JJR', 'JJS']) > 0)
contains_adv = lambda notevals, words: ("1+_adv", count_pos(words, ['RB','RBR','RBS']) > 0)
#contains_linesZ = lambda notevals, words: ("contains_1_line", notevals['contents'].count('\n') == 0)
contains_linesA = lambda notevals, words: ("contains_2+_lines", notevals['contents'].count('\n') >= 1)
contains_linesB = lambda notevals, words: ("3+_lines", notevals['contents'].count('\n') >= 2)
#contains_pronoun = lambda notevals, words: ("1+_pronouns", count_pos(words, ['PRP', 'PRP$']) >= 1)

# contains_verbs .28 # contains_dets .5 # contains_3_dets .2187 # contains_adj .52 # contains_adv .49 # contains_linesB .35

# Revision 1
Example #4
0
    (len(words) > 2 and words[2] in actionWords))

word_features = [
    first_word_action, first_words_action, first_word_stop, first_3words_action
]

#count_verbs = lambda notevals, words: ("count_verbs", ca.note_verbs(notevals)['note_verbs'])
#count_urls = lambda notevals, words: ("count_urls", ca.note_urls(notevals)['note_urls'])
#count_numbers = lambda notevals, words: ("count_numbers", ca.numbers(notevals)['numbers'])
#count_todos = lambda notevals, words: ("count_todos", ca.note_todos(notevals)['note_todos'])
#count_names = lambda notevals, words: ("count_names",  ca.note_names(notevals)["names"])
# Counts seem to be overfitting majorly, but also increasing overall accuracy...
count_features = []  ## Not using counts does better!
#count_features = [count_verbs,count_urls,count_numbers,count_todos,count_names]

contains_url = lambda notevals, words: ("contains_url", ca.note_urls(notevals)[
    'note_urls'] > 0)

contains_verbs = lambda notevals, words: ("3+_verbs", ca.note_verbs(notevals)[
    'note_verbs'] >= 3)
contains_dets = lambda notevals, words: ("1+_det", count_pos(words, ['DT']) > 0
                                         )

contains_3_dets = lambda notevals, words: ("3+_det", count_pos(words, ['DT'])
                                           >= 3)  ## 6 / 17 split
contains_adj = lambda notevals, words: (
    "1+_adj", count_pos(words, ['JJ', 'JJR', 'JJS']) > 0)
contains_adv = lambda notevals, words: (
    "1+_adv", count_pos(words, ['RB', 'RBR', 'RBS']) > 0)
#contains_linesZ = lambda notevals, words: ("contains_1_line", notevals['contents'].count('\n') == 0)
contains_linesA = lambda notevals, words: ("contains_2+_lines", notevals[
    'contents'].count('\n') >= 1)
Example #5
0
def one_or_no_url_redblk(note):
  note = n2vals(note)
  urls = ca.note_urls(note)
  if type(urls) == dict:
    return 'red' if urls['note_urls'] > 0 else 'black'
  return 'red' if urls[1] > 0 else 'black'
Example #6
0
def one_or_no_url_redblk(note):
    note = n2vals(note)
    urls = ca.note_urls(note)
    if type(urls) == dict:
        return 'red' if urls['note_urls'] > 0 else 'black'
    return 'red' if urls[1] > 0 else 'black'