Ejemplo n.º 1
0
def analyze(s):
    d = eval(s)
    special_keys = []
    name = d['name']
    electoral_district_type = d['electoral_district_type']
    electoral_district_name = d['electoral_district_name']
    state = d['state']
    link = d['link']
    text = d['sitetext'].lower().decode('utf-8')
    name, last, first = conv.clean_name(name)
    for v in vocabulary:
        special_keys += [conv.search_to_feature_key(v)] * text.count(v.lower())
        text.replace(v.lower(), '')
    special_keys += [conv.search_to_feature_key('name')] * text.count(
        name.lower())
    special_keys += [conv.search_to_feature_key('last')] * text.count(
        last.lower())
    special_keys += [conv.search_to_feature_key('first')] * text.count(
        first.lower())
    special_keys += [conv.search_to_feature_key('lastfor')
                     ] * text.count(last.lower() + ' for')
    special_keys += [conv.search_to_feature_key('lastfor')
                     ] * text.count(last.lower() + 'for')
    special_keys += [conv.search_to_feature_key('lastfor')
                     ] * text.count(last.lower() + '4')
    special_keys += [conv.search_to_feature_key('votelast')
                     ] * text.count('vote' + last.lower())
    special_keys += [conv.search_to_feature_key('forstate')
                     ] * text.count('for ' + state.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('reelect ' + name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('reelect ' + last.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('re elect ' + name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('re elect ' + last.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('re-elect ' + name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('re-elect ' + last.lower())
    special_keys += [conv.search_to_feature_key('electlast')
                     ] * text.count('elect ' + name.lower())
    special_keys += [conv.search_to_feature_key('electlast')
                     ] * text.count('elect ' + last.lower())
    special_keys += [conv.search_to_feature_key('votelast')
                     ] * text.count('vote ' + last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')
                     ] * text.count('vote for ' + last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')
                     ] * text.count('votefor' + last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')
                     ] * text.count('vote4' + last.lower())
    text.replace(name.lower(), '')
    text.replace(last.lower(), '')
    text.replace(first.lower(), '')
    special_keys += [
        conv.search_to_feature_key('electoral_district_type')
    ] * sum(
        text.count(edt.lower())
        for edt in conv.district_type_dict[electoral_district_type])
    special_keys += [conv.search_to_feature_key('officename')] * sum(
        text.count(on.lower()) for on in conv.office_names)
    special_keys += [conv.search_to_feature_key('electoral_district_name')
                     ] * text.count(electoral_district_name.lower())
    special_keys += [conv.search_to_feature_key('state')] * text.count(
        state.lower())
    name_key = conv.search_to_feature_key('name')
    last_key = conv.search_to_feature_key('last')
    first_key = conv.search_to_feature_key('first')
    #print 'name keys ',special_keys.count(name_key),'last keys ', special_keys.count(last_key), 'first keys ', special_keys.count(first_key)
    return basic_analyze(text) + special_keys
def analyze(s):
    d=eval(s)
    special_keys = []
    name = d['name']
    electoral_district_type = d['electoral_district_type']
    electoral_district_name = d['electoral_district_name']
    state = d['state']
    link = d['link']
    text = d['sitetext'].lower().decode('utf-8')
    name, last,first = conv.clean_name(name)
    for v in vocabulary:
        special_keys += [conv.search_to_feature_key(v)]*text.count(v.lower())
        text.replace(v.lower(),'')
    special_keys += [conv.search_to_feature_key('name')]*text.count(name.lower())
    special_keys += [conv.search_to_feature_key('last')]*text.count(last.lower())
    special_keys += [conv.search_to_feature_key('first')]*text.count(first.lower())
    special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+' for')
    special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+'for')
    special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+'4')
    special_keys += [conv.search_to_feature_key('votelast')]*text.count('vote'+last.lower())
    special_keys += [conv.search_to_feature_key('forstate')]*text.count('for '+state.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('reelect ' +name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('reelect ' +last.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re elect ' +name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re elect ' +last.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re-elect ' +name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re-elect ' +last.lower())
    special_keys += [conv.search_to_feature_key('electlast')]*text.count('elect ' +name.lower())
    special_keys += [conv.search_to_feature_key('electlast')]*text.count('elect ' +last.lower())
    special_keys += [conv.search_to_feature_key('votelast')]*text.count('vote '+last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('vote for '+last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('votefor'+last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('vote4'+last.lower())
    text.replace(name.lower(),'')
    text.replace(last.lower(),'')
    text.replace(first.lower(),'')
    special_keys += [conv.search_to_feature_key('electoral_district_type')]*sum(text.count(edt.lower()) for edt in conv.district_type_dict[electoral_district_type])
    special_keys += [conv.search_to_feature_key('officename')]*sum(text.count(on.lower()) for on in conv.office_names)
    special_keys += [conv.search_to_feature_key('electoral_district_name')]*text.count(electoral_district_name.lower())
    special_keys += [conv.search_to_feature_key('state')]*text.count(state.lower())
    name_key = conv.search_to_feature_key('name')
    last_key = conv.search_to_feature_key('last')
    first_key = conv.search_to_feature_key('first')
    #print 'name keys ',special_keys.count(name_key),'last keys ', special_keys.count(last_key), 'first keys ', special_keys.count(first_key)
    return basic_analyze(text) + special_keys
def analyze(s):
    d=eval(s)
    special_keys = []
    name = d['name']
    electoral_district_type = d['electoral_district_type']
    electoral_district_name = d['electoral_district_name']
    state = d['state']
    link = d['link']
    text = d['sitetext'].lower().decode('utf-8')
    name, last,first = conv.clean_name(name)
    for v in vocabulary:
        special_keys += [conv.search_to_feature_key(v)]*text.count(v.lower())
        text.replace(v.lower(),'')
    special_keys += [conv.search_to_feature_key('name')]*text.count(name.lower())
    special_keys += [conv.search_to_feature_key('last')]*text.count(last.lower())
    special_keys += [conv.search_to_feature_key('first')]*text.count(first.lower())
    special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+' for')
    special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+'for')
    special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+'4')
    special_keys += [conv.search_to_feature_key('votelast')]*text.count('vote'+last.lower())
    special_keys += [conv.search_to_feature_key('forstate')]*text.count('for '+state.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('reelect ' +name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('reelect ' +last.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re elect ' +name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re elect ' +last.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re-elect ' +name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re-elect ' +last.lower())
    special_keys += [conv.search_to_feature_key('electlast')]*text.count('elect ' +name.lower())
    special_keys += [conv.search_to_feature_key('electlast')]*text.count('elect ' +last.lower())
    special_keys += [conv.search_to_feature_key('votelast')]*text.count('vote '+last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('vote for '+last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('votefor'+last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('vote4'+last.lower())
    try:
        special_keys += [conv.search_to_feature_key('politicianpublicfigure')]*len(re.findall(r'{last}.{{1,50}}(?:public figure|politician)'.format(last=re.escape(last.encode('utf-8'))), text))
    except:
        import pdb;pdb.set_trace()
    text.replace(name.lower(),'')
    text.replace(last.lower(),'')
    text.replace(first.lower(),'')
    special_keys += [conv.search_to_feature_key('electoral_district_type')]*sum(text.count(edt.lower()) for edt in conv.district_type_dict[electoral_district_type])
    special_keys += [conv.search_to_feature_key('officename')]*sum(text.count(on.lower()) for on in conv.office_names)
    special_keys += [conv.search_to_feature_key('electoral_district_name')]*text.count(electoral_district_name.lower())
    special_keys += [conv.search_to_feature_key('state')]*text.count(state.lower())
    if fb_page_data.has_key(conv.strip_and_std(link)):
        fb_page_dict = fb_page_data[conv.strip_and_std(link)]
        special_keys.append(conv.search_to_feature_key('fbdata'))
        fans = int(math.log(int(fb_page_dict['fans'])))
        special_keys += [conv.search_to_feature_key('fbdata')]*fans
        if fb_page_dict['authentic'] == 'Authentic':
            special_keys.append(conv.search_to_feature_key('fbauthentic'))
    name_key = conv.search_to_feature_key('name')
    last_key = conv.search_to_feature_key('last')
    first_key = conv.search_to_feature_key('first')
    #print 'name keys ',special_keys.count(name_key),'last keys ', special_keys.count(last_key), 'first keys ', special_keys.count(first_key)
    return basic_analyze(text) + special_keys
def analyze(s):
    d = eval(s)
    special_keys = []
    name = d['name']
    electoral_district_type = d['electoral_district_type']
    electoral_district_name = d['electoral_district_name']
    state = d['state']
    link = d['link']
    text = d['sitetext'].lower().decode('utf-8')
    name, last, first = conv.clean_name(name)
    for v in vocabulary:
        special_keys += [conv.search_to_feature_key(v)] * text.count(v.lower())
        text.replace(v.lower(), '')
    special_keys += [conv.search_to_feature_key('name')] * text.count(
        name.lower())
    special_keys += [conv.search_to_feature_key('last')] * text.count(
        last.lower())
    special_keys += [conv.search_to_feature_key('first')] * text.count(
        first.lower())
    special_keys += [conv.search_to_feature_key('lastfor')
                     ] * text.count(last.lower() + ' for')
    special_keys += [conv.search_to_feature_key('lastfor')
                     ] * text.count(last.lower() + 'for')
    special_keys += [conv.search_to_feature_key('lastfor')
                     ] * text.count(last.lower() + '4')
    special_keys += [conv.search_to_feature_key('votelast')
                     ] * text.count('vote' + last.lower())
    special_keys += [conv.search_to_feature_key('forstate')
                     ] * text.count('for ' + state.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('reelect ' + name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('reelect ' + last.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('re elect ' + name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('re elect ' + last.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('re-elect ' + name.lower())
    special_keys += [conv.search_to_feature_key('reelectlast')
                     ] * text.count('re-elect ' + last.lower())
    special_keys += [conv.search_to_feature_key('electlast')
                     ] * text.count('elect ' + name.lower())
    special_keys += [conv.search_to_feature_key('electlast')
                     ] * text.count('elect ' + last.lower())
    special_keys += [conv.search_to_feature_key('votelast')
                     ] * text.count('vote ' + last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')
                     ] * text.count('vote for ' + last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')
                     ] * text.count('votefor' + last.lower())
    special_keys += [conv.search_to_feature_key('voteforlast')
                     ] * text.count('vote4' + last.lower())
    try:
        special_keys += [
            conv.search_to_feature_key('politicianpublicfigure')
        ] * len(
            re.findall(
                r'{last}.{{1,50}}(?:public figure|politician)'.format(
                    last=re.escape(last.encode('utf-8'))), text))
    except:
        import pdb
        pdb.set_trace()
    text.replace(name.lower(), '')
    text.replace(last.lower(), '')
    text.replace(first.lower(), '')
    special_keys += [
        conv.search_to_feature_key('electoral_district_type')
    ] * sum(
        text.count(edt.lower())
        for edt in conv.district_type_dict[electoral_district_type])
    special_keys += [conv.search_to_feature_key('officename')] * sum(
        text.count(on.lower()) for on in conv.office_names)
    special_keys += [conv.search_to_feature_key('electoral_district_name')
                     ] * text.count(electoral_district_name.lower())
    special_keys += [conv.search_to_feature_key('state')] * text.count(
        state.lower())
    if fb_page_data.has_key(conv.strip_and_std(link)):
        fb_page_dict = fb_page_data[conv.strip_and_std(link)]
        special_keys.append(conv.search_to_feature_key('fbdata'))
        fans = int(math.log(int(fb_page_dict['fans'])))
        special_keys += [conv.search_to_feature_key('fbdata')] * fans
        if fb_page_dict['authentic'] == 'Authentic':
            special_keys.append(conv.search_to_feature_key('fbauthentic'))
    name_key = conv.search_to_feature_key('name')
    last_key = conv.search_to_feature_key('last')
    first_key = conv.search_to_feature_key('first')
    #print 'name keys ',special_keys.count(name_key),'last keys ', special_keys.count(last_key), 'first keys ', special_keys.count(first_key)
    return basic_analyze(text) + special_keys