def note_names(note): from content_analysis import count_regex_matches import content_analysis global _names global name_stop_list if _names is None: _names = list(set([x.lower() for x in names.read() if len(x) > 2 and (x.lower() not in name_stop_list)])) rnames = [ "(^|\W+)%s($|\W+)" % nhit for nhit in [name for name in _names if name in note["contents"]]] if len(rnames) > 0: hits = [(n,count_regex_matches(n,note["contents"])) for n in rnames] #print hits hits = content_analysis.make_feature("names", reduce(lambda x,y: x + y, [count_regex_matches(n,note["contents"]) for n in rnames])) return hits return content_analysis.make_feature("names", 0)
def note_names(note): from content_analysis import count_regex_matches import content_analysis global _names global name_stop_list if _names is None: _names = list( set([ x.lower() for x in names.read() if len(x) > 2 and (x.lower() not in name_stop_list) ])) rnames = [ "(^|\W+)%s($|\W+)" % nhit for nhit in [name for name in _names if name in note["contents"]] ] if len(rnames) > 0: hits = [(n, count_regex_matches(n, note["contents"])) for n in rnames] #print hits hits = content_analysis.make_feature( "names", reduce(lambda x, y: x + y, [count_regex_matches(n, note["contents"]) for n in rnames])) return hits return content_analysis.make_feature("names", 0)
def note_date_count(s): import content_analysis return content_analysis.make_feature("note_date_count" , len(date_matches(s["contents"]))) ##reduce(plus, [ count_regex_matches(f,s) for f in all_ ])
def note_date_count(s): import content_analysis return content_analysis.make_feature( "note_date_count", len(date_matches(s["contents"])) ) ##reduce(plus, [ count_regex_matches(f,s) for f in all_ ])