def parse_mandates(self, table, ended=False): for row in list(table.children().items())[2:]: cols = row.children() link = cols.eq(1).find('a') (mandate_year, cdep_number) = parse_cdep_id(link.attr('href')) mandate = Mandate( year=mandate_year, cdep_number=cdep_number, person_name=link.text(), minority=False, end_date=None ) if cols.eq(2).text() == "ales la nivel naţional": mandate.minority = True else: mandate.constituency = int(cols.eq(2).text()) mandate.college = int(cols.eq(4).text()) mandate.party_name = cols.eq(5).text() county_name = fix_local_chars(cols.eq(3).text().title()) if county_name == "Bistrița-Năsăud": county_name = "Bistrița Năsăud" mandate.county_name = county_name if ended: mandate.end_date = parse_date(cols.eq(6).text()) yield mandate
def parse_mandates(self, table, ended=False): for row in list(table.children().items())[2:]: cols = row.children() link = cols.eq(1).find('a') (mandate_year, cdep_number) = parse_cdep_id(link.attr('href')) mandate = Mandate(year=mandate_year, cdep_number=cdep_number, person_name=link.text(), minority=False, end_date=None) if cols.eq(2).text() == "ales la nivel naţional": mandate.minority = True else: mandate.constituency = int(cols.eq(2).text()) mandate.college = int(cols.eq(4).text()) mandate.party_name = cols.eq(5).text() county_name = fix_local_chars(cols.eq(3).text().title()) if county_name == "Bistrița-Năsăud": county_name = "Bistrița Năsăud" mandate.county_name = county_name if ended: mandate.end_date = parse_date(cols.eq(6).text()) yield mandate
def get_people(): person_scraper = PersonScraper(get_cached_session()) for row in person_scraper.fetch_people(year): county_name = row.pop('county_name') if county_name: ok_name = fix_local_chars(county_name.title()) if ok_name == "Bistrița-Năsăud": ok_name = "Bistrița Năsăud" county = models.County.query.filter_by(name=ok_name).first() if county is None: logger.warn("Can't match county name %r", ok_name) else: row['county'] = county yield row
def calculate_question(question_id): fixup_path = path(__file__).abspath().parent / 'ministry_name_fixup.json' with fixup_path.open('rb') as f: fixup_map = flask.json.loads(f.read().decode('utf-8')) question = models.Question.query.get(question_id) name = fix_local_chars(question.addressee) for name in name.split(';'): name = re.sub(r'\s+', ' ', name.strip()).lower() if name in fixup_map: name = fixup_map[name] ministry = (models.Ministry.query.filter( func.lower(models.Ministry.name) == name.lower()).first()) if ministry is not None: break else: return question.policy_domain_id = ministry.policy_domain_id models.db.session.commit()
def calculate_question(question_id): fixup_path = path(__file__).abspath().parent / 'ministry_name_fixup.json' with fixup_path.open('rb') as f: fixup_map = flask.json.loads(f.read().decode('utf-8')) question = models.Question.query.get(question_id) name = fix_local_chars(question.addressee) for name in name.split(';'): name = re.sub(r'\s+', ' ', name.strip()).lower() if name in fixup_map: name = fixup_map[name] ministry = ( models.Ministry.query .filter(func.lower(models.Ministry.name) == name.lower()) .first() ) if ministry is not None: break else: return question.policy_domain_id = ministry.policy_domain_id models.db.session.commit()
def name_bits(self, name): return set(fix_local_chars(name).replace('-', ' ').split())
def fix_name(self, name): return fix_local_chars(re.sub(r'[\s\-]+', ' ', name))
def fix_name(self, name): return fix_local_chars(re.sub(r"[\s\-]+", " ", name))
def parse_mandates(self, table, ended=False): row_list = list(table.children().items()) uninominal = bool('Colegiul uninominal' in row_list[1].text()) if uninominal: college_col = 4 party_col = 5 else: college_col = None party_col = 4 has_start_date = bool('Membru din' in row_list[0].text()) for row in row_list[2:]: cols = row.children() link = cols.eq(1).find('a') (year, chamber, number) = parse_profile_url(link.attr('href')) person_page = self.fetch_url(link.attr('href')) picture = person_page.find('a.highslide') mandate = Mandate( year=year, chamber_number=chamber, cdep_number=number, person_name=link.text(), minority=False, end_date=None, picture_url=picture.attr('href'), ) if (cols.eq(2).text() in ["ales la nivel naţional", ""] and cols.eq(3).text() in ["Mino.", "Minoritati", u"Minorităţi"]): mandate.minority = True else: mandate.constituency = int(cols.eq(2).text()) if college_col: mandate.college = int(cols.eq(college_col).text()) else: mandate.college = None mandate.party_name = cols.eq(party_col).text() county_name = fix_local_chars(cols.eq(3).text().title()) if county_name == "Bistrița-Năsăud": county_name = "Bistrița Năsăud" mandate.county_name = county_name if ended: end_date_col = 6 if mandate.minority: end_date_col -= 1 if not has_start_date: end_date_col -= 1 if uninominal and not mandate.minority: end_date_col += 1 mandate.end_date = parse_date(cols.eq(end_date_col).text()) if (mandate.year, mandate.cdep_number) == (2004, 88): mandate.person_name = u"Mălaimare Mihai Adrian" yield mandate
def parse_mandates(self, table, ended=False): row_list = list(table.children().items()) uninominal = bool('Colegiul uninominal' in row_list[1].text()) if uninominal: college_col = 4 party_col = 5 else: college_col = None party_col = 4 has_start_date = bool('Membru din' in row_list[0].text()) for row in row_list[2:]: cols = row.children() link = cols.eq(1).find('a') (year, chamber, number) = parse_profile_url(link.attr('href')) last_first = link.text() person_page = self.fetch_url(link.attr('href')) picture = person_page.find('a.highslide') first_last = ( person_page.find('.headline').html() .split('<br/>')[0] .split(',')[0] .split('\xa0\xa0\xa0\xa0')[0] ) (first_name, last_name) = match_split_name(last_first, first_last) mandate = Mandate( year=year, chamber_number=chamber, cdep_number=number, person_name=last_first, person_first_name=first_name, person_last_name=last_name, minority=False, end_date=None, picture_url=picture.attr('href'), ) if (cols.eq(2).text() in ["ales la nivel naţional", ""] and cols.eq(3).text() in ["Mino.", "Minoritati", u"Minorităţi"]): mandate.minority = True else: mandate.constituency = int(cols.eq(2).text()) if college_col: mandate.college = int(cols.eq(college_col).text()) else: mandate.college = None mandate.party_name = cols.eq(party_col).text() county_name = fix_local_chars(cols.eq(3).text().title()) if county_name == "Bistrița-Năsăud": county_name = "Bistrița Năsăud" mandate.county_name = county_name if ended: end_date_col = 6 if mandate.minority: end_date_col -= 1 if not has_start_date: end_date_col -= 1 if uninominal and not mandate.minority: end_date_col += 1 mandate.end_date = parse_date( cols.eq(end_date_col).text(), fmt='ro_short_month', ) yield mandate