def one_or_more_verbs_redblk(note): note = n2vals(note) lines = ca.note_verbs(note) if type(lines) == dict: val = lines['note_verbs'] else: val = lines[1] return 'red' if val > 0 else 'black'
first_3words_action = lambda notevals, words: ("first_3words_action", words[0] in actionWords or (len(words) > 1 and words[1] in actionWords) or (len(words) > 2 and words[2] in actionWords)) word_features = [first_word_action, first_words_action, first_word_stop, first_3words_action] #count_verbs = lambda notevals, words: ("count_verbs", ca.note_verbs(notevals)['note_verbs']) #count_urls = lambda notevals, words: ("count_urls", ca.note_urls(notevals)['note_urls']) #count_numbers = lambda notevals, words: ("count_numbers", ca.numbers(notevals)['numbers']) #count_todos = lambda notevals, words: ("count_todos", ca.note_todos(notevals)['note_todos']) #count_names = lambda notevals, words: ("count_names", ca.note_names(notevals)["names"]) # Counts seem to be overfitting majorly, but also increasing overall accuracy... count_features = [] ## Not using counts does better! #count_features = [count_verbs,count_urls,count_numbers,count_todos,count_names] contains_url = lambda notevals, words: ("contains_url", ca.note_urls(notevals)['note_urls'] > 0) contains_verbs = lambda notevals, words: ("3+_verbs", ca.note_verbs(notevals)['note_verbs'] >= 3) contains_dets = lambda notevals, words: ("1+_det", count_pos(words, ['DT']) > 0) contains_3_dets = lambda notevals, words: ("3+_det", count_pos(words, ['DT']) >=3) ## 6 / 17 split contains_adj = lambda notevals, words: ("1+_adj", count_pos(words, ['JJ', 'JJR', 'JJS']) > 0) contains_adv = lambda notevals, words: ("1+_adv", count_pos(words, ['RB','RBR','RBS']) > 0) #contains_linesZ = lambda notevals, words: ("contains_1_line", notevals['contents'].count('\n') == 0) contains_linesA = lambda notevals, words: ("contains_2+_lines", notevals['contents'].count('\n') >= 1) contains_linesB = lambda notevals, words: ("3+_lines", notevals['contents'].count('\n') >= 2) #contains_pronoun = lambda notevals, words: ("1+_pronouns", count_pos(words, ['PRP', 'PRP$']) >= 1) # contains_verbs .28 # contains_dets .5 # contains_3_dets .2187 # contains_adj .52 # contains_adv .49 # contains_linesB .35 # Revision 1 #contains_features = [contains_verbs, contains_dets, contains_3_dets, contains_adj, contains_adv]#,contains_linesB]#,contains_linesZ,contains_linesA,contains_linesB]#, contains_tabs] contains_features = [contains_verbs,contains_adj]#, contains_url] ##
first_word_action, first_words_action, first_word_stop, first_3words_action ] #count_verbs = lambda notevals, words: ("count_verbs", ca.note_verbs(notevals)['note_verbs']) #count_urls = lambda notevals, words: ("count_urls", ca.note_urls(notevals)['note_urls']) #count_numbers = lambda notevals, words: ("count_numbers", ca.numbers(notevals)['numbers']) #count_todos = lambda notevals, words: ("count_todos", ca.note_todos(notevals)['note_todos']) #count_names = lambda notevals, words: ("count_names", ca.note_names(notevals)["names"]) # Counts seem to be overfitting majorly, but also increasing overall accuracy... count_features = [] ## Not using counts does better! #count_features = [count_verbs,count_urls,count_numbers,count_todos,count_names] contains_url = lambda notevals, words: ("contains_url", ca.note_urls(notevals)[ 'note_urls'] > 0) contains_verbs = lambda notevals, words: ("3+_verbs", ca.note_verbs(notevals)[ 'note_verbs'] >= 3) contains_dets = lambda notevals, words: ("1+_det", count_pos(words, ['DT']) > 0 ) contains_3_dets = lambda notevals, words: ("3+_det", count_pos(words, ['DT']) >= 3) ## 6 / 17 split contains_adj = lambda notevals, words: ( "1+_adj", count_pos(words, ['JJ', 'JJR', 'JJS']) > 0) contains_adv = lambda notevals, words: ( "1+_adv", count_pos(words, ['RB', 'RBR', 'RBS']) > 0) #contains_linesZ = lambda notevals, words: ("contains_1_line", notevals['contents'].count('\n') == 0) contains_linesA = lambda notevals, words: ("contains_2+_lines", notevals[ 'contents'].count('\n') >= 1) contains_linesB = lambda notevals, words: ("3+_lines", notevals['contents']. count('\n') >= 2) #contains_pronoun = lambda notevals, words: ("1+_pronouns", count_pos(words, ['PRP', 'PRP$']) >= 1)