no = int(sheet.cell_value(rowx=row, colx=2))
                abstained = int(sheet.cell_value(rowx=row, colx=3))
                total = int(sheet.cell_value(rowx=row, colx=4))
                votes_by_party_dict[party] = vote_stats_per_party_tuple(yes, no, abstained, total)
            sessions.append(session_tuple(description, time, None, votes_by_party_dict))
        row += 1
    return reg_by_party_dict, sessions


##############################################################################
# Parse and save to disc.
##############################################################################
logger_to_db = logging.getLogger('to_db')


cur.execute("""SELECT original_url FROM stenograms""")
urls_already_in_db = set(_[0] for _ in cur.fetchall())
stenogram_IDs = [(i, u'http://www.parliament.bg/bg/plenaryst/ns/7/ID/'+i)
                 for i in map(str.strip, open('data/IDs_plenary_stenograms_41').readlines())]
stenogram_IDs += [(i, u'http://www.parliament.bg/bg/plenaryst/ns/50/ID/'+i)
                  for i in map(str.strip, open('data/IDs_plenary_stenograms_42').readlines())]
for i, (ID, original_url) in enumerate(stenogram_IDs[-5:]):
    problem_by_name = False
    problem_by_party = False
    logger_to_db.info("Parsing stenogram %s - %d of %d." % (ID, i+1, len(stenogram_IDs)))

    try:
        f = urlopen(original_url)
        complete_stenogram_page = f.read().decode('utf-8')
        parser = StenogramsHTMLParser(complete_stenogram_page)
        date_string = parser.date.strftime('%d%m%y')
from pk_logging import logging
from pk_tools import urlopen, canonical_party_name


logger_mps = logging.getLogger("mps_data")


names_list = []
forces_list = []
mails_list = []
url_list = []


# TODO hardcoded value: id of the first mp from the current assembly
indices = map(int, open("data/IDs_MPs").readlines())
cur.execute("""SELECT original_url FROM mps""")
urls_already_in_db = set(zip(*cur.fetchall())[0])
for i in range(835, max(indices) + 1):
    original_url = unicode("http://www.parliament.bg/bg/MP/%d" % i)
    if original_url in urls_already_in_db:
        continue
    logger_mps.info("Parsing data for MP id %s" % i)
    xml_file = unicode("http://www.parliament.bg/export.php/bg/xml/MP/%d" % i)
    xml_str = urlopen(xml_file).read()
    try:
        r = xmltodict.parse(xml_str)
        name = (
            " ".join(
                [
                    r["schema"]["Profile"]["Names"]["FirstName"]["@value"],
                    r["schema"]["Profile"]["Names"]["SirName"]["@value"],