def IterVote(text, sitting_dict): print sitting_dict["uid"] vote_count = 1 pre_match_end = 0 for match in Namelist_Token.finditer(text): vote_seq = str(vote_count).zfill(3) vote_dict = { 'uid': '%s-%s' % (sitting_dict["uid"], vote_seq), 'sitting_id': sitting_dict["uid"], 'vote_seq': vote_seq, 'date': sitting_dict["date"], 'content': GetVoteContent(text[pre_match_end:match.end()]) } UpsertVote(vote_dict) ref = {'agree': 1, 'disagree': -1, 'abstain': 0} for key, value in ref.items(): if match.group(key): for id, councilor_id in common.getIdList( c, common.getNameList( re.sub(u'[、:,:,]', ' ', match.group(key))), sitting_dict): VoteVoterRelation(id, vote_dict['uid'], value) vote_count += 1 pre_match_end = match.end()
def IterVote(text, sitting_dict): print sitting_dict["uid"] vote_count = 1 pre_match_end = 0 for match in Namelist_Token.finditer(text): vote_seq = str(vote_count).zfill(3) vote_dict = {'uid': '%s-%s' % (sitting_dict["uid"], vote_seq), 'sitting_id': sitting_dict["uid"], 'vote_seq': vote_seq, 'date': sitting_dict["date"], 'content': GetVoteContent(text[pre_match_end:match.end()])} UpsertVote(vote_dict) ref = {'agree': 1, 'disagree': -1, 'abstain': 0} for key, value in ref.items(): if match.group(key): for id, councilor_id in common.getIdList(c, common.getNameList(re.sub(u'[、:,:,]', ' ', match.group(key))), sitting_dict): VoteVoterRelation(id, vote_dict['uid'], value) vote_count += 1 pre_match_end = match.end()
def IterVote(text, sitting_dict): print sitting_dict["uid"] vote_count = 1 pre_match_end = 0 for match in Namelist_Token.finditer(text): vote_seq = str(vote_count).zfill(3) vote_dict = {'uid': '%s-%s' % (sitting_dict["uid"], vote_seq), 'sitting_id': sitting_dict["uid"], 'vote_seq': vote_seq, 'date': sitting_dict["date"], 'content': match.group()} UpsertVote(vote_dict) ref = {u'贊成': 1, u'反對': -1, u'棄權': 0} for key, value in ref.items(): for i in range(0, len(match.groups()), 2): if match.groups()[i] == key: names = re.sub(u'(副?議長|議員)', '', match.groups()[i+1]) for id, councilor_id in common.getIdList(c, common.getNameList(re.sub(u'[、:,:,]', ' ', names)), sitting_dict): VoteVoterRelation(id, vote_dict['uid'], value) vote_count += 1 pre_match_end = match.end()
df['councilor'] = map(lambda x: re.sub(u'[\s ]$', '', x) if x else nan, df['councilor']) df['councilor'] = map(lambda x: re.sub(u'(副?議長|議員)', '', x) if x else nan, df['councilor']) df['councilor'] = map(lambda x: re.sub(u'[..]', u'‧', x) if x else nan, df['councilor']) df['councilor'] = map(lambda x: re.sub(u'、', ' ', x) if x else nan, df['councilor']) for wrong, right in [(u'羅文幟', u'羅文熾'), (u'郭昭嚴', u'郭昭巖'), (u'闕梅莎', u'闕枚莎'), (u'林亦華', u'林奕華'), (u'周鍾$', u'周鍾㴴'), (u'汪志銘', u'汪志冰'), (u'簡余宴', u'簡余晏'), (u'周佑威', u'周威佑'), (u'黃洋', u'黃平洋'), (u'周玲玟', u'周玲妏')]: df['councilor'] = map(lambda x: re.sub(wrong, right, x) if x else nan, df['councilor']) df['councilor_ids'] = map( lambda x: getIdList(common.getNameList(x), election_year, county) if x else nan, df['councilor']) df['suggest_expense'] = map(lambda x: x * 1000 if is_number(x) else nan, df['suggest_expense']) df['approved_expense'] = map(lambda x: x * 1000 if is_number(x) else nan, df['approved_expense']) df_concat = concat([df_concat, df]) def Suggestions(suggestion): for column in [ 'position', 'expend_on', 'brought_by', 'bid_type', 'bid_by' ]: suggestion[column] = suggestion[column].strip( ) if suggestion[column] else '' suggestion['bid_by'] = re.sub(u'(股份)?有限公司', '', suggestion['bid_by'])
df_concat = DataFrame() for f in files: print open(f, 'r').name df = pd.read_excel(f, sheetname=0, header=None, encoding='utf-8') county = re.search(u'\S*?[縣市]', df.icol(0)[0]).group() year, month = re.sub('\D', ' ', df.icol(0)[1]).split() if year != '103' and month != '12': continue df = pd.read_excel(f, sheetname=0, header=None, usecols=range(0, 9), skiprows=5, names=['councilor', 'suggestion', 'position', 'suggest_expense', 'approved_expense', 'expend_on', 'brought_by', 'bid_type', 'bid_by'], encoding='utf-8') df.dropna(inplace=True, how='any', subset=['suggestion']) df['councilor'] = map(lambda x: re.sub(u'[\s ]$', '', x) if x else nan, df['councilor']) df['councilor'] = map(lambda x: re.sub(u'[..]', u'‧', x) if x else nan, df['councilor']) df['councilor'] = map(lambda x: re.sub(u'、', ' ', x) if x else nan, df['councilor']) for wrong, right in [(u'郭昭嚴', u'郭昭巖'), (u'闕梅莎', u'闕枚莎'), (u'林亦華', u'林奕華'), (u'周鍾$', u'周鍾㴴'), (u'汪志銘', u'汪志冰'), (u'簡余宴', u'簡余晏'), (u'周佑威', u'周威佑'), (u'黃洋', u'黃平洋'), (u'周玲玟', u'周玲妏')]: df['councilor'] = map(lambda x: re.sub(wrong, right, x) if x else nan, df['councilor']) df['councilor_ids'] = map(lambda x: getIdList(common.getNameList(x), '2010', county) if x else nan, df['councilor']) df['suggest_expense'] = map(lambda x: x*1000 if is_number(x) else nan, df['suggest_expense']) df['approved_expense'] = map(lambda x: x*1000 if is_number(x) else nan, df['approved_expense']) df['county'] = county df['suggest_year'] = str(int(year) + 1911) df['suggest_month'] = month df['uid'] = map(lambda x: '%s-%d-%d' % (county, int(year)+1911, x+6), df.index) df_concat = concat([df_concat, df]) def Suggestions(suggestion): for column in ['position', 'expend_on', 'brought_by', 'bid_type', 'bid_by']: suggestion[column] = suggestion[column].strip() if suggestion[column] else '' suggestion['bid_by'] = re.sub(u'(股份)?有限公司', '', suggestion['bid_by']) c.execute(''' UPDATE suggestions_suggestions SET county = %(county)s, election_year = %(election_year)s, suggest_year = %(suggest_year)s, suggest_month = %(suggest_month)s, suggestion = %(suggestion)s, position = %(position)s, suggest_expense = %(suggest_expense)s, approved_expense = %(approved_expense)s, expend_on = %(expend_on)s, brought_by = %(brought_by)s, bid_type = %(bid_type)s, bid_by = %(bid_by)s, district = %(district)s, constituency = %(constituency)s
continue df = pd.read_excel(f, sheetname=0, header=None, usecols=range(0, 9), skiprows=5, names=['councilor', 'suggestion', 'position', 'suggest_expense', 'approved_expense', 'expend_on', 'brought_by', 'bid_type', 'bid_by'], encoding='utf-8') election_year = get_election_year(county, int(year) + 1911) df['election_year'] = election_year df['county'] = county df['suggest_year'] = str(int(year) + 1911) df['suggest_month'] = month df['uid'] = map(lambda x: '%s-%d-%d' % (county, int(year)+1911, x+6), df.index) df.dropna(inplace=True, how='any', subset=['suggestion']) df['councilor'] = map(lambda x: re.sub(u'[\s ]$', '', x) if x else nan, df['councilor']) df['councilor'] = map(lambda x: re.sub(u'(副?議長|議員)', '', x) if x else nan, df['councilor']) df['councilor'] = map(lambda x: re.sub(u'[..]', u'‧', x) if x else nan, df['councilor']) df['councilor'] = map(lambda x: re.sub(u'、', ' ', x) if x else nan, df['councilor']) for wrong, right in [(u'羅文幟', u'羅文熾'), (u'郭昭嚴', u'郭昭巖'), (u'闕梅莎', u'闕枚莎'), (u'林亦華', u'林奕華'), (u'周鍾$', u'周鍾㴴'), (u'汪志銘', u'汪志冰'), (u'簡余宴', u'簡余晏'), (u'周佑威', u'周威佑'), (u'黃洋', u'黃平洋'), (u'周玲玟', u'周玲妏')]: df['councilor'] = map(lambda x: re.sub(wrong, right, x) if x else nan, df['councilor']) df['councilor_ids'] = map(lambda x: getIdList(common.getNameList(x), election_year, county) if x else nan, df['councilor']) df['suggest_expense'] = map(lambda x: x*1000 if is_number(x) else nan, df['suggest_expense']) df['approved_expense'] = map(lambda x: x*1000 if is_number(x) else nan, df['approved_expense']) df_concat = concat([df_concat, df]) def Suggestions(suggestion): for column in ['position', 'expend_on', 'brought_by', 'bid_type', 'bid_by']: suggestion[column] = suggestion[column].strip() if suggestion[column] else '' suggestion['bid_by'] = re.sub(u'(股份)?有限公司', '', suggestion['bid_by']) c.execute(''' UPDATE suggestions_suggestions SET county = %(county)s, election_year = %(election_year)s, suggest_year = %(suggest_year)s, suggest_month = %(suggest_month)s, suggestion = %(suggestion)s, position = %(position)s, suggest_expense = %(suggest_expense)s, approved_expense = %(approved_expense)s, expend_on = %(expend_on)s, brought_by = %(brought_by)s, bid_type = %(bid_type)s, bid_by = %(bid_by)s, district = %(district)s, constituency = %(constituency)s WHERE uid = %(uid)s ''', suggestion) c.execute(''' INSERT into suggestions_suggestions(uid, county, election_year, suggest_year, suggest_month, suggestion, position, suggest_expense, approved_expense, expend_on, brought_by, bid_type, bid_by, district, constituency)