class ElectionsTableBuilder(object):
  
  def __init__(self, host, user, database):
    self.db = SQLConnection(host=host, user=user, db=database)


  def build(self):
    self.create_elections_table()
    years = utils.get_election_years(base_dir=BASE_DIR)
    self.populate_elections_table(years)

  
  def __del__(self):
    self.db.close()


  def create_elections_table(self):
    self.db.execute('DROP TABLE IF EXISTS elections;')
    self.db.execute("""
     CREATE TABLE elections (
       id INT NOT NULL AUTO_INCREMENT,
       level ENUM('federal', 'state'),
       chamber ENUM('house', 'senate', 'legislative_assembly', 'legislative_council'),
       election_date DATE,
       is_byelection BOOLEAN,
       PRIMARY KEY(id)
       );
     """ )

  
  def _populate_election_table_element(self, year, chamber):
    if chamber == 'house':
      txtfile = 'reps1.txt'
    elif chamber == 'senate':
      txtfile = 'senate1.txt'
    fname = BASE_DIR + year + '/' + year + txtfile
    if not os.path.isfile(fname):
      logging.info(' '.join(['No', chamber, 'results file in', year, ". Continuing.\n"]))
      return
    with open(fname, 'r') as f:
      headline = f.readline()
      election_day, election_month, election_year = headline.strip().split(' ')[-3:]
      if election_year == '1901':
        election_day = '29'
      election_date_str = ' '.join([election_day, election_month, election_year])
      sql = """
        INSERT INTO elections (level, chamber, election_date, is_byelection)
        VALUES ('%s', '%s', STR_TO_DATE('%s', '%%d %%M %%Y'), %d)
      """ % ('federal', chamber, election_date_str, 0)
      self.db.execute(sql)


  def populate_elections_table(self, election_years):
    for year in election_years:
      self._populate_election_table_element(year, 'house')
      self._populate_election_table_element(year, 'senate')
 def __init__(self, host, user, database):
   self.db = SQLConnection(host=host, user=user, db=database)
 def __init__(self, host, user, database):
   self.db = SQLConnection(host=host, user=user, db=database)
   self.base_dir = BASE_DIR + 'candidates/'
class CandidatesTableBuilder(object):
  
  def __init__(self, host, user, database):
    self.db = SQLConnection(host=host, user=user, db=database)
    self.base_dir = BASE_DIR + 'candidates/'


  def __del__(self):
    self.db.close()


  def build(self):
    self.create_candidates_tables()
    candidate_files = self.get_candidate_files()
    candidate_index_raw = self.get_candidate_index(candidate_files)
    candidate_index, cross_ref = self.parse_index_and_cross_reference(candidate_index_raw)
    self.write_candidates_table(candidate_index)


  def create_candidates_tables(self):
    self.db.execute('DROP TABLE IF EXISTS candidacies;')
    self.db.execute("""
     CREATE TABLE candidacies (
       id INT NOT NULL AUTO_INCREMENT,
       election_id INT,
       electorate_id INT,
       state_code VARCHAR(3),
       candidate_name_id INT,
       was_elected BOOLEAN,
       PRIMARY KEY(id)
       );
     """ )
    self.db.execute('DROP TABLE IF EXISTS candidate_names;')
    self.db.execute("""
     CREATE TABLE candidate_names (
       id INT NOT NULL AUTO_INCREMENT,
       candidate_name VARCHAR(50),
       PRIMARY KEY(id)
       );
     """ )


  def get_candidate_files(self):
    fnames = []
    for root, _, files in os.walk(self.base_dir):
      [fnames.append(root + end) for end in files 
       if 'intro.txt' not in end and '.shtml' not in end]
    return fnames


  def get_candidate_index(self, fnames):
    candidate_index_raw = []
    for fname in fnames:
      with open(fname, 'r') as candidate_file:
        for line in candidate_file.readlines():
          if line.startswith('>') or line.startswith('<'):
            candidate_index_raw.append(line.strip())
          elif line.startswith('   '):
            candidate_index_raw[-1] += ' '+line.strip()
    return candidate_index_raw


  def _re_parse_index_entry(self, entry):
    index_content = list()
    g = re.search('>\s(.+, .+ \(.+, .+\)):\s(.+)|>\s(.+, .+):\s(.+)|>\s(.+, .+):|>\s(.+, .+),\s(.+)|>\s(.+):\s(.+)|>\s(.+, .+)\s(.+)', entry)
    for group in g.groups():
      if group is not None:
        index_content.append(group)
    return {index_content[0]: index_content[1]} if len(index_content) == 2 else None


  def _re_parse_index_alias(self, entry):
    alias_content = list()
    g = re.search('<\s(.+): see (.+)|<\s(.+) \(see (.+)\)', entry)
    for group in g.groups():
      if group is not None:
        alias_content.append(group)
    return {alias_content[0]: alias_content[1]} if len(alias_content) == 2 else None


  def _re_parse_mistaken_entry(self, entry):
    index_content = list()
    g = re.search('<\s(.+): (.+)', entry)
    for group in g.groups():
      if group is not None:
        index_content.append(group)
    return {index_content[0]: index_content[1]} if len(index_content) == 2 else None


  def parse_index_and_cross_reference(self, candidate_index_raw):
    index = dict()
    cross_ref = dict()
    for entry in candidate_index_raw:
      if entry.startswith('>'):
        try:
          index.update(self._re_parse_index_entry(entry))
        except:
          logging.warn('Cannot parse: ' + entry)
      elif entry.startswith('<') and len(re.findall('\d+', entry)) == 0:
        cross_ref.update(self._re_parse_index_alias(entry))
      elif entry.startswith('<') and len(re.findall('\d+', entry)) != 0:
        index.update(self._re_parse_mistaken_entry(entry))
    return index, cross_ref


  def write_candidates_table(self, candidate_index):
    states_short, states_long = zip(*self.db.fetch('SELECT code, state_name FROM states'))
    for candidate, elections in candidate_index.iteritems():
      self.db.execute("""INSERT INTO candidate_names (candidate_name) VALUES ("%s")""" % candidate)
      for electorates in elections.split(','):
        electorate_years = electorates.split()
        state = [el for el in electorate_years if el.upper() in states_short]
        if len(state) == 1:
          state = state[0]
          state_ix = electorate_years.index(state)
          electorate = ' '.join(electorate_years[:state_ix])
          state_code = state.upper()
          years = electorate_years[state_ix+1:]
        else:
          state = [state for state in states_long if state in electorates]
          if len(state) == 1:
            state = state[0]
            electorate, allyears = electorates.split(state)
            state_code = states_short[states_long.index(state)]
            years = allyears.split()
            if len(electorate) == 0: electorate = state
          else:
            try:
              state = 'NULL'
              state_code = 'NULL'
              electorate, allyears = electorates.split(' ',1)
              years = allyears.split()
            except:
              continue
        for year in years:
          if not (year.startswith('1') or year.startswith('2')):
            continue # Catch bad parsing
          if year.endswith('b') or year.endswith('b*'):
            continue # Exclude bielections
          safe_year = year[:4]
          was_elected = 1 if year.endswith('*') else 0
          if electorate == 'Senate':
            election_id = utils.get_election_id(self.db, safe_year, 'senate')
            electorate_id = "NULL"
          else:
            election_id = utils.get_election_id(self.db, safe_year, 'house')
            electorate_id = utils.get_electorate_id(self.db, electorate, state_code, election_id) if election_id is not "NULL" else "NULL"
          if election_id is "NULL":
            logging.warn(candidate+' '+electorate+' '+state+' '+year)
          candidate_sql = """SELECT id FROM candidate_names WHERE candidate_name = "%s" """ % candidate
          candidate_name_id = utils.safe_id(self.db.fetch(candidate_sql))
          insert_str = """INSERT INTO candidacies (election_id, electorate_id, state_code, candidate_name_id, was_elected)
                          VALUES ({election_id}, {electorate_id}, '{state_code}', "{candidate_name_id}", {was_elected})"""
          insert_sql = insert_str.format(election_id=election_id, electorate_id=electorate_id, state_code=state_code, candidate_name_id=candidate_name_id, was_elected=was_elected)
          self.db.execute(insert_sql)
class ElectoratesTableBuilder(object):
  
  def __init__(self, host, user, database):
    self.db = SQLConnection(host=host, user=user, db=database)


  def build(self):
    self.base_dir = BASE_DIR + 'divisions/'
    self.create_electorates_table()
    states = utils.get_states()
    election_years = utils.get_election_years(base_dir=BASE_DIR)
    election_files = utils.get_election_files(BASE_DIR, election_years, 'house', states)
    for efile in election_files:
      self.parse_electorate_file(efile)

  
  def __del__(self):
    self.db.close()


  def _parse_counts_line(self, line):
    output = []
    buff = ''
    while len(line) > 0:
      item = line.pop(0)
      if item.isdigit():
        buff += item
        continue
      else:
        if len(buff) > 0:
          output.append(int(buff))
        buff = ''
        continue
    return output

    
  def _to_int(self, string):
    return int(string.replace(',','').replace('.',''))


  def create_electorates_table(self):
    self.db.execute('DROP TABLE IF EXISTS electorates;')
    self.db.execute("""
     CREATE TABLE electorates (
       id INT NOT NULL AUTO_INCREMENT,
       election_id INT,
       state_code VARCHAR(3),
       electorate_name VARCHAR(30),
       enrollments INT,
       ballots INT,
       PRIMARY KEY(id)
       );
     """ )


  def parse_electorate_file(self, fileinfo):
    fname = fileinfo['fname']
    state_code = fileinfo['state'].upper()
    election_id = utils.get_election_id(self.db, fileinfo['year'], fileinfo['chamber'])
    with open(fname, 'r') as f:
      lines = [line.strip() for line in f]
      breaks = [i for i,x in enumerate(lines) if '===' in x]
      electorate_chunks = [lines[breaks[i]-1:breaks[i+1]-1] 
                           for i in range(len(breaks[:-1]))]
      electorate_chunks.append(lines[breaks[-1]-1:])
      for chunk in electorate_chunks[2:]:
        headline = re.split('\s\s\s+', chunk[0])
        if len(headline) == 1:
          continue
        electorate_name = headline[0].split(',')[0].title()
        electorate_counts = self._parse_counts_line(re.split('[\s,]',headline[1]))
        electorate_enrolled = electorate_counts[0]
        if len(electorate_counts) > 1:
          electorate_ballots = electorate_counts[1]
        else:
          electorate_ballots = "NULL"
        sql = """
          INSERT INTO electorates (election_id, state_code, electorate_name, enrollments, ballots)
          VALUES (%d, '%s', "%s", %d, %s)
        """ % (election_id, state_code, electorate_name, electorate_enrolled, electorate_ballots)
        self.db.execute(sql)
class ResultsTableBuilder(object):
  
  def __init__(self, host, user, database):
    self.db = SQLConnection(host=host, user=user, db=database)


  def build(self):
    self.create_results_table()
    states = utils.get_states()
    election_years = utils.get_election_years(base_dir=BASE_DIR)
    house_files = utils.get_election_files(BASE_DIR, election_years, 'house', states)
    for hfile in house_files:
      self.parse_results_data(hfile)

  
  def __del__(self):
    self.db.close()


  def _parse_counts_line(self, line):
    output = []
    buff = ''
    while len(line) > 0:
      item = line.pop(0)
      if item.isdigit():
        buff += item
        continue
      else:
        if len(buff) > 0:
          output.append(int(buff))
        buff = ''
        continue
    return output

    
  def _to_int(self, string):
    return int(string.replace(',','').replace('.',''))


  def _get_party_dict(self, party_chunk):
    party_dictionary = {}
    for entry in party_chunk:
      if len(entry) == 0:
        continue
      elif entry[0] != '*':
        continue
      split_entry = re.split('\s+',entry)
      party_code = split_entry[1].upper()
      party_name = ' '.join(split_entry[3:])
      party_dictionary[party_code] = party_name
    return party_dictionary


  def _get_electorate_info(self, chunk):
    headline = re.split('\s\s\s+', chunk[0])
    if len(headline) == 1:
      return None, None, None
    electorate_name = headline[0].split(',')[0].title()
    electorate_counts = self._parse_counts_line(re.split('[\s,]',headline[1]))
    electorate_enrolled = electorate_counts[0]
    if len(electorate_counts) > 1:
      electorate_ballots = electorate_counts[1]
    else:
      electorate_ballots = "NULL"
    return electorate_name, electorate_enrolled, electorate_ballots


  def create_results_table(self):
    self.db.execute('DROP TABLE IF EXISTS results;')
    self.db.execute("""
     CREATE TABLE results (
       id INT NOT NULL AUTO_INCREMENT,
       election_id INT,
       electorate_id INT,
       candidate_name_id INT,
       candidacy_id INT,
       ballot_name VARCHAR(50),
       party_code VARCHAR(5),
       votes INT,
       pct DECIMAL(3,1),
       tpp_votes INT,
       tpp_pct DECIMAL(3,1),
       PRIMARY KEY(id)
       );
     """ )

  def _parse_ballot_counts(self, ballot_data):
    ballot_dict = dict()
    pct_exact_pattern = re.compile('^\d{2}\.\d$')
    delta_pct_exact_pattern = re.compile('^\({0,1}[\+\-\s]\d{2}\.\d\){0,1}$')
    pct_pattern = re.compile('\d{2}\.\d')
    delta_pct_pattern = re.compile('\({0,1}[\+\-\s]\d{2}\.\d\){0,1}')
    votes_pattern = re.compile('\d{0,3}\,{0,1}\d{1,3}')
    for entry in ballot_data:
      entry = entry.replace('\x92', "'").replace('\xb9',"'")
      entry_data = re.split('\s{2,}',entry)
      if len(entry) < 20 or len(entry_data) < 1 or ':' in entry or entry.startswith('>'):
        continue
      if entry_data[-1] == 'Unopposed':
        name = entry_data[0]
        namekey = name.translate(None,'*+').strip().title()
        ballot_dict.update({namekey: dict()})
        last_name = name.translate(None,'*+').split()[-1].title()
        ballot_dict[namekey].update({'full_name': namekey.title()})
        is_incumbent = 1 if name.endswith('*') or name.endswith('+') else 0
        ballot_dict[namekey].update({'is_incumbent': is_incumbent})
        ballot_dict[namekey].update({'is_elected': 1})
        ballot_dict[namekey].update({'votes': 'NULL'})
        ballot_dict[namekey].update({'tpp_votes': 'NULL'})
        ballot_dict[namekey].update({'tpp_pct': 100})
        ballot_dict[namekey].update({'pct': 100})
        ballot_dict[namekey].update({'delta_pct': 'NULL'})
        party_code = entry_data[1] if entry_data[1].isalpha() else 'NULL'
        ballot_dict[namekey].update({'party': party_code})
      elif 'informal' in entry and votes_pattern.search(entry) is not None:
        namekey = 'Informal'
        ballot_dict.update({namekey: dict()})
        ballot_dict[namekey].update({'tpp_votes': 'NULL'})
        if 'unknown' in entry:
          ballot_dict[namekey].update({'votes': 'NULL'})
          ballot_dict[namekey].update({'pct': 'NULL'})
        else:
          votes = int(votes_pattern.match(entry).group().replace(',',''))
          pct = float(pct_pattern.search(entry).group())
          ballot_dict[namekey].update({'votes': votes})
          ballot_dict[namekey].update({'pct': pct})
      elif (pct_exact_pattern.match(entry_data[-1]) is not None or
            delta_pct_exact_pattern.match(entry_data[-1]) is not None
            ):
        name = entry_data[0]
        if name.replace(',','').isdigit():
          continue
        name_parts = name.translate(None,'*+').split()
        last_name = name.translate(None,'*+').split()[-1]
        if len(name_parts[0]) == 1: 
          namekey = ' '.join(name_parts).title()
          if namekey in ballot_dict:
            namekey = ' '.join(name_parts).title()
            _ = name_parts.pop(0)
          else:
            namekey = last_name.title()
        else:
          namekey = last_name.title()

        if len(name_parts) > 1 or name == 'Strider':
          if namekey in ballot_dict:
            prev_name = copy.deepcopy(ballot_dict[namekey]['full_name'])
            if name_parts[0] != 'Hon':
              new_name = ' '.join([prev_name[0], prev_name.split()[-1]])
            else:
              new_name = ' '.join([prev_name[1], prev_name.split()[-1]])
            ballot_dict.update({new_name: copy.deepcopy(ballot_dict[namekey])})
            del ballot_dict[namekey]
            namekey = ' '.join([name[0], namekey])
          ballot_dict.update({namekey: dict()})
          ballot_dict[namekey].update({'full_name': ' '.join(name_parts).title()})
          is_incumbent = 1 if name.endswith('*') else 0
          ballot_dict[namekey].update({'is_incumbent': is_incumbent})
          is_elected = 1 if last_name == last_name.upper() else 0
          ballot_dict[namekey].update({'is_elected': is_elected})
          votes = int(votes_pattern.search(entry).group().replace(',',''))
          ballot_dict[namekey].update({'votes': votes})
          ballot_dict[namekey].update({'tpp_votes': votes})
          pct = float(pct_pattern.search(entry).group())
          ballot_dict[namekey].update({'pct': pct})
          party_code = entry_data[1] if entry_data[1].isalpha() else 'NULL'
          ballot_dict[namekey].update({'party': party_code})
          try:
            delta_pct = float(delta_pct_pattern.search(entry).group().translate(None,'()'))
          except AttributeError:
            delta_pct = 'NULL'
          ballot_dict[namekey].update({'delta_pct': delta_pct})
        else:
          pref_votes = int(votes_pattern.search(entry).group().replace(',',''))
          ballot_dict[namekey]['tpp_votes'] += pref_votes
          is_elected = 1 if last_name == last_name.upper() else 0
          ballot_dict[namekey].update({'is_elected': is_elected})

    if len(ballot_dict) >= 2:
      tpp_dict = dict()
      for k, v in ballot_dict.iteritems():
        if k != 'Informal':
          tpp_dict.update({k: v['tpp_votes']})
      top_tpp = sorted(tpp_dict.items(), key=lambda x: x[1], reverse=True)[:2]
      top_names, top_votes = zip(*top_tpp)
      vote_total = top_votes[0] + top_votes[1]
      for k in ballot_dict.iterkeys():
        if k not in top_names:
          ballot_dict[k].update({'tpp_votes': "NULL"})
          ballot_dict[k].update({'tpp_pct': "NULL"})
        else:
          tpp_pct = float(tpp_dict[k]) / vote_total * 100
          ballot_dict[k].update({'tpp_pct': tpp_pct })
    return ballot_dict

  def parse_results_data(self, fileinfo):
    fname = fileinfo['fname']
    election_id = utils.get_election_id(self.db, fileinfo['year'], fileinfo['chamber'])
    with open(fname, 'r') as f:
      lines = [line.strip() for line in f]
      breaks = [i for i,x in enumerate(lines) if '===' in x]
      electorate_data = [lines[breaks[i]-1:breaks[i+1]-1] 
                         for i in range(len(breaks[:-1]))]
      electorate_data.append(lines[breaks[-1]-1:])
      party_dict = self._get_party_dict(electorate_data[0])
      for electorate in electorate_data[3:]:
        electorate_name, _, _ = self._get_electorate_info(electorate)
        if electorate_name is None:
          continue
        electorate_id = utils.get_electorate_id(self.db, electorate_name, fileinfo['state'], election_id)
        logging.info(electorate_name + ' ' +fileinfo['year'])
        ballot_counts = self._parse_ballot_counts(electorate)
        logging.info(ballot_counts)
        for candidate in ballot_counts.iterkeys():
          if candidate != 'Informal':
            ballot_name = ballot_counts[candidate]['full_name']
            candidate_name_id = utils.get_candidate_name_id(self.db, ballot_counts[candidate]['full_name'])
            candidacy_id = utils.get_candidacy_id(self.db, election_id, electorate_id, candidate_name_id)
            party_code = ballot_counts[candidate]['party']
            votes = ballot_counts[candidate]['votes']
            pct = ballot_counts[candidate]['pct']
            tpp_votes = ballot_counts[candidate]['tpp_votes']
            tpp_pct = ballot_counts[candidate]['tpp_pct']
          else:
            ballot_name = 'NULL'
            candidate_name_id = 'NULL'
            candidacy_id = 'NULL'
            party_code = 'NULL'
            votes = ballot_counts[candidate]['votes']
            pct = ballot_counts[candidate]['pct']
            tpp_votes = 'NULL'
            tpp_pct = 'NULL'
          sql = """
            INSERT INTO results (election_id, electorate_id, candidate_name_id, candidacy_id, ballot_name, party_code, votes, pct, tpp_votes, tpp_pct)
            VALUES (%s, %s, %s, %s, """ % (election_id, electorate_id, candidate_name_id, candidacy_id)
          sql += '"%s", ' % ballot_name if ballot_name != 'NULL' else 'NULL, '
          sql += '"%s", ' % party_code if party_code != 'NULL' else 'NULL, '
          sql += "%s, %s, %s, %s)" % (votes, pct, tpp_votes, tpp_pct)
          self.db.execute(sql)
class PartiesTableBuilder(object):
    def __init__(self, host, user, database):
        self.db = SQLConnection(host=host, user=user, db=database)

    def build(self):
        self.create_parties_table()
        states = utils.get_states()
        election_years = utils.get_election_years(base_dir=BASE_DIR)
        house_files = utils.get_election_files(BASE_DIR, election_years, "house", states)
        senate_files = utils.get_election_files(BASE_DIR, election_years, "senate", states)
        party_dictionary = {}
        for fdata in house_files:
            self.parse_party_data(fdata, party_dictionary)
        for sdata in senate_files:
            self.parse_party_data(sdata, party_dictionary)
        self.insert_party_data(party_dictionary)

    def __del__(self):
        self.db.close()

    def _get_election_id(self, year, chamber):
        sql = "SELECT id FROM elections WHERE YEAR(election_date) = %s AND chamber = '%s'" % (year, chamber)
        id_raw = self.db.fetch(sql)
        return int(id_raw[0][0])

    def create_parties_table(self):
        self.db.execute("DROP TABLE IF EXISTS parties;")
        self.db.execute(
            """
     CREATE TABLE parties (
       party_code VARCHAR(5),
       party_name VARCHAR(50),
       party_name_alt VARCHAR(50),
       PRIMARY KEY(party_code)
       );
     """
        )

    def parse_party_data(self, fileinfo, party_dictionary):
        fname = fileinfo["fname"]
        with open(fname, "r") as f:
            lines = [line.strip() for line in f]
            breaks = [i for i, x in enumerate(lines) if "===" in x]
            if len(breaks) < 2:
                return
            party_chunk = lines[breaks[0] - 1 : breaks[1] - 1]
            for entry in party_chunk:
                if len(entry) == 0:
                    continue
                elif entry[0] != "*":
                    continue
                split_entry = re.split("\s+", entry)
                party_code = split_entry[1].upper()
                party_name = " ".join(split_entry[3:])
                if party_code is None:
                    continue
                if party_code not in party_dictionary:
                    party_dictionary[party_code] = [party_name]
                elif party_name not in party_dictionary[party_code]:
                    party_dictionary[party_code].append(party_name)

    def insert_party_data(self, party_dictionary):
        for party in party_dictionary:
            if len(party_dictionary[party]) == 1:
                party_name = party_dictionary[party][0]
                party_name_alt = "NULL"
            else:
                party_name = party_dictionary[party][0]
                party_name_alt = party_dictionary[party][1]
            if "Emergency Committee" in party:
                party = "Emergency Committee"
            sql = """
        INSERT INTO parties (party_name, party_code, party_name_alt)
        VALUES ('%s', '%s', '%s')
      """ % (
                party_name,
                party,
                party_name_alt,
            )
            self.db.execute(sql)