def analyze(s): d = eval(s) special_keys = [] name = d['name'] electoral_district_type = d['electoral_district_type'] electoral_district_name = d['electoral_district_name'] state = d['state'] link = d['link'] text = d['sitetext'].lower().decode('utf-8') name, last, first = conv.clean_name(name) for v in vocabulary: special_keys += [conv.search_to_feature_key(v)] * text.count(v.lower()) text.replace(v.lower(), '') special_keys += [conv.search_to_feature_key('name')] * text.count( name.lower()) special_keys += [conv.search_to_feature_key('last')] * text.count( last.lower()) special_keys += [conv.search_to_feature_key('first')] * text.count( first.lower()) special_keys += [conv.search_to_feature_key('lastfor') ] * text.count(last.lower() + ' for') special_keys += [conv.search_to_feature_key('lastfor') ] * text.count(last.lower() + 'for') special_keys += [conv.search_to_feature_key('lastfor') ] * text.count(last.lower() + '4') special_keys += [conv.search_to_feature_key('votelast') ] * text.count('vote' + last.lower()) special_keys += [conv.search_to_feature_key('forstate') ] * text.count('for ' + state.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('reelect ' + name.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('reelect ' + last.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('re elect ' + name.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('re elect ' + last.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('re-elect ' + name.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('re-elect ' + last.lower()) special_keys += [conv.search_to_feature_key('electlast') ] * text.count('elect ' + name.lower()) special_keys += [conv.search_to_feature_key('electlast') ] * text.count('elect ' + last.lower()) special_keys += [conv.search_to_feature_key('votelast') ] * text.count('vote ' + last.lower()) special_keys += [conv.search_to_feature_key('voteforlast') ] * text.count('vote for ' + last.lower()) special_keys += [conv.search_to_feature_key('voteforlast') ] * text.count('votefor' + last.lower()) special_keys += [conv.search_to_feature_key('voteforlast') ] * text.count('vote4' + last.lower()) text.replace(name.lower(), '') text.replace(last.lower(), '') text.replace(first.lower(), '') special_keys += [ conv.search_to_feature_key('electoral_district_type') ] * sum( text.count(edt.lower()) for edt in conv.district_type_dict[electoral_district_type]) special_keys += [conv.search_to_feature_key('officename')] * sum( text.count(on.lower()) for on in conv.office_names) special_keys += [conv.search_to_feature_key('electoral_district_name') ] * text.count(electoral_district_name.lower()) special_keys += [conv.search_to_feature_key('state')] * text.count( state.lower()) name_key = conv.search_to_feature_key('name') last_key = conv.search_to_feature_key('last') first_key = conv.search_to_feature_key('first') #print 'name keys ',special_keys.count(name_key),'last keys ', special_keys.count(last_key), 'first keys ', special_keys.count(first_key) return basic_analyze(text) + special_keys
def analyze(s): d=eval(s) special_keys = [] name = d['name'] electoral_district_type = d['electoral_district_type'] electoral_district_name = d['electoral_district_name'] state = d['state'] link = d['link'] text = d['sitetext'].lower().decode('utf-8') name, last,first = conv.clean_name(name) for v in vocabulary: special_keys += [conv.search_to_feature_key(v)]*text.count(v.lower()) text.replace(v.lower(),'') special_keys += [conv.search_to_feature_key('name')]*text.count(name.lower()) special_keys += [conv.search_to_feature_key('last')]*text.count(last.lower()) special_keys += [conv.search_to_feature_key('first')]*text.count(first.lower()) special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+' for') special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+'for') special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+'4') special_keys += [conv.search_to_feature_key('votelast')]*text.count('vote'+last.lower()) special_keys += [conv.search_to_feature_key('forstate')]*text.count('for '+state.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('reelect ' +name.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('reelect ' +last.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re elect ' +name.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re elect ' +last.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re-elect ' +name.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re-elect ' +last.lower()) special_keys += [conv.search_to_feature_key('electlast')]*text.count('elect ' +name.lower()) special_keys += [conv.search_to_feature_key('electlast')]*text.count('elect ' +last.lower()) special_keys += [conv.search_to_feature_key('votelast')]*text.count('vote '+last.lower()) special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('vote for '+last.lower()) special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('votefor'+last.lower()) special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('vote4'+last.lower()) text.replace(name.lower(),'') text.replace(last.lower(),'') text.replace(first.lower(),'') special_keys += [conv.search_to_feature_key('electoral_district_type')]*sum(text.count(edt.lower()) for edt in conv.district_type_dict[electoral_district_type]) special_keys += [conv.search_to_feature_key('officename')]*sum(text.count(on.lower()) for on in conv.office_names) special_keys += [conv.search_to_feature_key('electoral_district_name')]*text.count(electoral_district_name.lower()) special_keys += [conv.search_to_feature_key('state')]*text.count(state.lower()) name_key = conv.search_to_feature_key('name') last_key = conv.search_to_feature_key('last') first_key = conv.search_to_feature_key('first') #print 'name keys ',special_keys.count(name_key),'last keys ', special_keys.count(last_key), 'first keys ', special_keys.count(first_key) return basic_analyze(text) + special_keys
def analyze(s): d=eval(s) special_keys = [] name = d['name'] electoral_district_type = d['electoral_district_type'] electoral_district_name = d['electoral_district_name'] state = d['state'] link = d['link'] text = d['sitetext'].lower().decode('utf-8') name, last,first = conv.clean_name(name) for v in vocabulary: special_keys += [conv.search_to_feature_key(v)]*text.count(v.lower()) text.replace(v.lower(),'') special_keys += [conv.search_to_feature_key('name')]*text.count(name.lower()) special_keys += [conv.search_to_feature_key('last')]*text.count(last.lower()) special_keys += [conv.search_to_feature_key('first')]*text.count(first.lower()) special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+' for') special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+'for') special_keys += [conv.search_to_feature_key('lastfor')]*text.count(last.lower()+'4') special_keys += [conv.search_to_feature_key('votelast')]*text.count('vote'+last.lower()) special_keys += [conv.search_to_feature_key('forstate')]*text.count('for '+state.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('reelect ' +name.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('reelect ' +last.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re elect ' +name.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re elect ' +last.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re-elect ' +name.lower()) special_keys += [conv.search_to_feature_key('reelectlast')]*text.count('re-elect ' +last.lower()) special_keys += [conv.search_to_feature_key('electlast')]*text.count('elect ' +name.lower()) special_keys += [conv.search_to_feature_key('electlast')]*text.count('elect ' +last.lower()) special_keys += [conv.search_to_feature_key('votelast')]*text.count('vote '+last.lower()) special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('vote for '+last.lower()) special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('votefor'+last.lower()) special_keys += [conv.search_to_feature_key('voteforlast')]*text.count('vote4'+last.lower()) try: special_keys += [conv.search_to_feature_key('politicianpublicfigure')]*len(re.findall(r'{last}.{{1,50}}(?:public figure|politician)'.format(last=re.escape(last.encode('utf-8'))), text)) except: import pdb;pdb.set_trace() text.replace(name.lower(),'') text.replace(last.lower(),'') text.replace(first.lower(),'') special_keys += [conv.search_to_feature_key('electoral_district_type')]*sum(text.count(edt.lower()) for edt in conv.district_type_dict[electoral_district_type]) special_keys += [conv.search_to_feature_key('officename')]*sum(text.count(on.lower()) for on in conv.office_names) special_keys += [conv.search_to_feature_key('electoral_district_name')]*text.count(electoral_district_name.lower()) special_keys += [conv.search_to_feature_key('state')]*text.count(state.lower()) if fb_page_data.has_key(conv.strip_and_std(link)): fb_page_dict = fb_page_data[conv.strip_and_std(link)] special_keys.append(conv.search_to_feature_key('fbdata')) fans = int(math.log(int(fb_page_dict['fans']))) special_keys += [conv.search_to_feature_key('fbdata')]*fans if fb_page_dict['authentic'] == 'Authentic': special_keys.append(conv.search_to_feature_key('fbauthentic')) name_key = conv.search_to_feature_key('name') last_key = conv.search_to_feature_key('last') first_key = conv.search_to_feature_key('first') #print 'name keys ',special_keys.count(name_key),'last keys ', special_keys.count(last_key), 'first keys ', special_keys.count(first_key) return basic_analyze(text) + special_keys
def analyze(s): d = eval(s) special_keys = [] name = d['name'] electoral_district_type = d['electoral_district_type'] electoral_district_name = d['electoral_district_name'] state = d['state'] link = d['link'] text = d['sitetext'].lower().decode('utf-8') name, last, first = conv.clean_name(name) for v in vocabulary: special_keys += [conv.search_to_feature_key(v)] * text.count(v.lower()) text.replace(v.lower(), '') special_keys += [conv.search_to_feature_key('name')] * text.count( name.lower()) special_keys += [conv.search_to_feature_key('last')] * text.count( last.lower()) special_keys += [conv.search_to_feature_key('first')] * text.count( first.lower()) special_keys += [conv.search_to_feature_key('lastfor') ] * text.count(last.lower() + ' for') special_keys += [conv.search_to_feature_key('lastfor') ] * text.count(last.lower() + 'for') special_keys += [conv.search_to_feature_key('lastfor') ] * text.count(last.lower() + '4') special_keys += [conv.search_to_feature_key('votelast') ] * text.count('vote' + last.lower()) special_keys += [conv.search_to_feature_key('forstate') ] * text.count('for ' + state.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('reelect ' + name.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('reelect ' + last.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('re elect ' + name.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('re elect ' + last.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('re-elect ' + name.lower()) special_keys += [conv.search_to_feature_key('reelectlast') ] * text.count('re-elect ' + last.lower()) special_keys += [conv.search_to_feature_key('electlast') ] * text.count('elect ' + name.lower()) special_keys += [conv.search_to_feature_key('electlast') ] * text.count('elect ' + last.lower()) special_keys += [conv.search_to_feature_key('votelast') ] * text.count('vote ' + last.lower()) special_keys += [conv.search_to_feature_key('voteforlast') ] * text.count('vote for ' + last.lower()) special_keys += [conv.search_to_feature_key('voteforlast') ] * text.count('votefor' + last.lower()) special_keys += [conv.search_to_feature_key('voteforlast') ] * text.count('vote4' + last.lower()) try: special_keys += [ conv.search_to_feature_key('politicianpublicfigure') ] * len( re.findall( r'{last}.{{1,50}}(?:public figure|politician)'.format( last=re.escape(last.encode('utf-8'))), text)) except: import pdb pdb.set_trace() text.replace(name.lower(), '') text.replace(last.lower(), '') text.replace(first.lower(), '') special_keys += [ conv.search_to_feature_key('electoral_district_type') ] * sum( text.count(edt.lower()) for edt in conv.district_type_dict[electoral_district_type]) special_keys += [conv.search_to_feature_key('officename')] * sum( text.count(on.lower()) for on in conv.office_names) special_keys += [conv.search_to_feature_key('electoral_district_name') ] * text.count(electoral_district_name.lower()) special_keys += [conv.search_to_feature_key('state')] * text.count( state.lower()) if fb_page_data.has_key(conv.strip_and_std(link)): fb_page_dict = fb_page_data[conv.strip_and_std(link)] special_keys.append(conv.search_to_feature_key('fbdata')) fans = int(math.log(int(fb_page_dict['fans']))) special_keys += [conv.search_to_feature_key('fbdata')] * fans if fb_page_dict['authentic'] == 'Authentic': special_keys.append(conv.search_to_feature_key('fbauthentic')) name_key = conv.search_to_feature_key('name') last_key = conv.search_to_feature_key('last') first_key = conv.search_to_feature_key('first') #print 'name keys ',special_keys.count(name_key),'last keys ', special_keys.count(last_key), 'first keys ', special_keys.count(first_key) return basic_analyze(text) + special_keys