예제 #1
0
def note_names(note):
    from content_analysis import count_regex_matches
    import content_analysis
    global _names
    global name_stop_list
    if _names is None:
        _names = list(set([x.lower() for x in names.read() if len(x) > 2 and (x.lower() not in name_stop_list)]))
    rnames = [ "(^|\W+)%s($|\W+)" % nhit for nhit in [name for name in _names if name in note["contents"]]]
    if len(rnames) > 0:
        hits = [(n,count_regex_matches(n,note["contents"])) for n in rnames]
        #print hits
        hits = content_analysis.make_feature("names", reduce(lambda x,y: x + y, [count_regex_matches(n,note["contents"]) for n in rnames]))
        return hits
    return content_analysis.make_feature("names", 0)
예제 #2
0
def note_names(note):
    from content_analysis import count_regex_matches
    import content_analysis
    global _names
    global name_stop_list
    if _names is None:
        _names = list(
            set([
                x.lower() for x in names.read()
                if len(x) > 2 and (x.lower() not in name_stop_list)
            ]))
    rnames = [
        "(^|\W+)%s($|\W+)" % nhit
        for nhit in [name for name in _names if name in note["contents"]]
    ]
    if len(rnames) > 0:
        hits = [(n, count_regex_matches(n, note["contents"])) for n in rnames]
        #print hits
        hits = content_analysis.make_feature(
            "names",
            reduce(lambda x, y: x + y,
                   [count_regex_matches(n, note["contents"]) for n in rnames]))
        return hits
    return content_analysis.make_feature("names", 0)
예제 #3
0
def note_date_count(s):
    import content_analysis
    return content_analysis.make_feature("note_date_count" , len(date_matches(s["contents"]))) ##reduce(plus, [ count_regex_matches(f,s) for f in all_ ])
예제 #4
0
def note_date_count(s):
    import content_analysis
    return content_analysis.make_feature(
        "note_date_count", len(date_matches(s["contents"]))
    )  ##reduce(plus, [ count_regex_matches(f,s) for f in all_ ])