import datetime
import re

import bs4
import xlrd

from pk_db import db, cur
from pk_logging import logging, logger_workaround
from pk_namedtuples import *
from pk_tools import urlopen, canonical_party_name


##############################################################################
# HTML Parsing
##############################################################################
logger_html = logging.getLogger('html_parser')


class StenogramsHTMLParser(bs4.BeautifulSoup):
    def __init__(self, text):
        super(StenogramsHTMLParser, self).__init__(text)

        self.date = datetime.datetime.strptime(self.find('div', class_='dateclass').string.strip(), '%d/%m/%Y')

        self.data_list = list(self.find('div', class_='markcontent').stripped_strings)

        self.votes_indices = []
        how_many_have_voted_marker = u'Гласувал[и]?[ ]*\d*[ ]*народни[ ]*представители:'
        # The above marker regex must permit a number of spelling errors that can be present in the stenograms.
        for i, l in enumerate(self.data_list):
            if re.search(how_many_have_voted_marker, l):
        return i
    raise ValueError


##############################################################################
# Load templates.
##############################################################################
templates = TemplateLookup(
    directories=["mako_templates"], input_encoding="utf-8", output_encoding="utf-8", strict_undefined=True
)


##############################################################################
# Prepare loggers.
##############################################################################
logger_html = logging.getLogger("static_html_gen")


##############################################################################
# Set up sitemap.
##############################################################################
class Sitemap(object):
    def __init__(self):
        self.base_string = '<?xml version="1.0" encoding="UTF-8"?>\n<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1">\n%s\n</urlset>'
        self.url_string = "<url><loc>http://www.parlamentaren-kontrol.com/%s</loc><priority>%0.1f</priority>%s</url>"
        self.image_string = "<image:image><image:loc>http://www.parlamentaren-kontrol.com/%s</image:loc><image:caption>%s</image:caption></image:image>"
        self.content_tuples = []

    def add(self, loc, priority, images=[]):
        self.content_tuples.append((loc, priority, images))
# -*- coding: utf-8 -*-
import xml

import xmltodict

from pk_db import db, cur
from pk_logging import logging
from pk_tools import urlopen, canonical_party_name


logger_mps = logging.getLogger("mps_data")


names_list = []
forces_list = []
mails_list = []
url_list = []


# TODO hardcoded value: id of the first mp from the current assembly
indices = map(int, open("data/IDs_MPs").readlines())
cur.execute("""SELECT original_url FROM mps""")
urls_already_in_db = set(zip(*cur.fetchall())[0])
for i in range(835, max(indices) + 1):
    original_url = unicode("http://www.parliament.bg/bg/MP/%d" % i)
    if original_url in urls_already_in_db:
        continue
    logger_mps.info("Parsing data for MP id %s" % i)
    xml_file = unicode("http://www.parliament.bg/export.php/bg/xml/MP/%d" % i)
    xml_str = urlopen(xml_file).read()
    try:
u'отхвърлен(зала второ четене)':                       'rejected_2nd',
u'оттеглен от вносителя(оттеглен)':                    'retracted',
u'наложено вето(вето президент)':                      'vetoed',
u'внесен(преразглеждане зала (след вето))':            'proposed_after_veto',
u'повторно приемане(преразглеждане зала (след вето))': 'accepted_after_veto',
# TODO the next few are unclear in their definition (raise a warning)
u'оспорени текстове(преразглеждане зала (след вето))': 'challenged_after_veto',
u'оспорен по принцип(преразглеждане зала (след вето))':'challenged_after_veto',
#u'обсъждане(зала първо четене)':                       'proposed_1st', see signature 002-02-50
}


##############################################################################
# Gather bills.
##############################################################################
logger_html_bills = logging.getLogger('html_parser_bills')

origurlcur = db.cursor()
origurlcur.execute("""SELECT original_url FROM bills""")
urls_already_in_db = set(u[0] for u in origurlcur)

logger_html_bills.info('Opening calendar.')
base_url = 'http://www.parliament.bg'
parser_calendar = bs4.BeautifulSoup(urlopen(base_url + '/bg/bills/').read())
for month in parser_calendar.find('div', id='calendar').find_all('a'):
    href = month.get('href')
    y,m = map(int, href.split('/')[-1].split('-'))
    if y<2009 or (y==2009 and m<7): continue # XXX hardcoded check (only last parliament)
    logger_html_bills.info('Opening calendar %d %d.'%(y, m))
    month_page = bs4.BeautifulSoup(urlopen(base_url + href).read())
    for a in month_page.find('div', id='monthview').find_all('a'):