def sync_data(conn, filename, batch_time): backup_filename = "{}-{:02d}-{:02d}-{}".format(batch_time.year, batch_time.month, batch_time.day, filename) print("\n\n-- ----------------------------- ") print("-- File: {}".format(backup_filename)) summary = Summary() for parlamentarier_id, nachname, vorname in db.get_active_parlamentarier( conn): summary_row = summary.get_row(parlamentarier_id) summary_row.parlamentarier_name = nachname + ", " + vorname with open(filename) as data_file: content = json.load(data_file) pdf_creation_date = content["metadata"]["pdf_creation_date"] archive_pdf_name = content["metadata"]["archive_pdf_name"] stichdatum = datetime.strptime(pdf_creation_date, "%Y-%m-%d %H:%M:%S") print("-- PDF creation date: {}".format(stichdatum)) print("-- PDF archive file: {}".format( content["metadata"]["archive_pdf_name"])) print("-- ----------------------------- ") handle_removed_groups(content, conn, summary, stichdatum, batch_time) for group in content["data"]: name_de = normalize_organisation(group["name_de"]) name_fr = normalize_organisation(group["name_fr"]) name_it = normalize_organisation(group["name_it"]) members = group["praesidium"] organisation_id = db.get_organisation_id(conn, name_de, name_fr, name_it) if organisation_id: handle_names(group, name_de, name_fr, name_it, organisation_id, summary, conn, batch_time) handle_homepage_and_sekretariat(group, name_de, name_fr, name_it, organisation_id, summary, conn, batch_time) for member in members: names = get_names(member) parlamentarier_id = db.get_parlamentarier_id_by_name( conn, names) if not parlamentarier_id: print( "DATA INTEGRITY FAILURE: Parlamentarier {} not found in database." .format(member)) sys.exit(1) parlamentarier_dict = db.get_parlamentarier_dict( conn, parlamentarier_id) geschlecht = parlamentarier_dict["geschlecht"] beschreibung = '' if len(members) > 1: if geschlecht == "M": beschreibung = "Co-Präsident" if geschlecht == "F": beschreibung = "Co-Präsidentin" interessenbindung_id = None if parlamentarier_id and organisation_id: interessenbindung_id = db.get_interessenbindung_id( conn, parlamentarier_id, organisation_id, stichdatum) summary_row = summary.get_row(parlamentarier_id) if not interessenbindung_id: print("\n-- Neue Interessenbindung zwischen '{}' und '{}'". format(name_de, member)) if not organisation_id: organisation_id = '@last_parlamentarische_gruppe' summary_row.neue_gruppe("neu", name_de) else: summary_row.neue_gruppe(organisation_id, name_de) print( sql_statement_generator. insert_interessenbindung_parlamentarische_gruppe( parlamentarier_id, organisation_id, stichdatum, beschreibung, batch_time)) else: summary_row.gruppe_unveraendert(organisation_id, name_de) return (summary)
def sync_data(conn, filename, council, batch_time): backup_filename = "{}-{:02d}-{:02d}-{}".format(batch_time.year, batch_time.month, batch_time.day, filename) print("\n\n-- ----------------------------- ") print("-- {} ".format(council)) print("-- File: {}".format(backup_filename)) summary_rows = [] with open(filename) as data_file: content = json.load(data_file) pdf_date_str = content["metadata"]["pdf_creation_date"] pdf_date = datetime.strptime( pdf_date_str, "%Y-%m-%d %H:%M:%S") # 2019-07-12 14:55:08 archive_pdf_name = content["metadata"]["archive_pdf_name"] print("-- PDF creation date: {}".format(pdf_date)) print("-- PDF archive file: {}".format(archive_pdf_name)) print("-- ----------------------------- ") count = 1 for parlamentarier in content["data"]: #load info about parlamentarier kanton_id = db.get_kanton_id(conn, parlamentarier["canton"]) partei_id = db.get_partei_id(conn, parlamentarier["party"]) parlamentarier_id = db.get_parlamentarier_id( conn, parlamentarier["names"], kanton_id, partei_id) parlamentarier["id"] = parlamentarier_id parlamentarier_db_dict = db.get_parlamentarier_dict( conn, parlamentarier_id) parlamentarier_active = parlamentarier_db_dict[ 'im_rat_bis'] == None or parlamentarier_db_dict[ 'im_rat_bis'] > date.today() #existing guests (from database) existing_guest_1, existing_guest_2 = db.get_guests( conn, parlamentarier_id) #new guests (from JSON file) new_guests = parlamentarier["guests"] new_guest_1 = new_guests[0] if len(new_guests) > 0 else None new_guest_2 = new_guests[1] if len(new_guests) > 1 else None #summary row summary_row = summary.SummaryRow(parlamentarier, count, parlamentarier_db_dict) count += 1 #check if existing guest 1 left or stayed if name_logic.are_guests_equal(existing_guest_1, new_guest_1): summary_row.set_guest_1(existing_guest_1) funktion_equal = guest_remained(parlamentarier, existing_guest_1, new_guest_1, batch_time, pdf_date) if not funktion_equal: summary_row.set_guest_1_changes("funktion") elif name_logic.are_guests_equal(existing_guest_1, new_guest_2): summary_row.set_guest_1(existing_guest_1) funktion_equal = guest_remained(parlamentarier, existing_guest_1, new_guest_2, batch_time, pdf_date) if not funktion_equal: summary_row.set_guest_1_changes("funktion") else: guest_removed(parlamentarier, existing_guest_1, batch_time, pdf_date) summary_row.set_removed_guest_1(existing_guest_1) #check if existing guest 2 left or stayed if name_logic.are_guests_equal(existing_guest_2, new_guest_1): summary_row.set_guest_2(existing_guest_2) funktion_equal = guest_remained(parlamentarier, existing_guest_2, new_guest_1, batch_time, pdf_date) if not funktion_equal: summary_row.set_guest_2_changes("funktion") elif name_logic.are_guests_equal(existing_guest_2, new_guest_2): summary_row.set_guest_2(existing_guest_2) funktion_equal = guest_remained(parlamentarier, existing_guest_2, new_guest_2, batch_time, pdf_date) if not funktion_equal: summary_row.set_guest_2_changes("funktion") else: guest_removed(parlamentarier, existing_guest_2, batch_time, pdf_date) summary_row.set_removed_guest_2(existing_guest_2) # check if new guest 1 was already here if not name_logic.are_guests_equal( new_guest_1, existing_guest_1) and not name_logic.are_guests_equal( new_guest_1, existing_guest_2) and parlamentarier_active: guest_added(conn, parlamentarier, new_guest_1, batch_time, pdf_date) summary_row.set_new_guest_1(new_guest_1) # check if new guest 2 was already here if not name_logic.are_guests_equal( new_guest_2, existing_guest_1) and not name_logic.are_guests_equal( new_guest_2, existing_guest_2) and parlamentarier_active: # and not (parlamentarier_id == 223 and new_guest_2 != None and new_guest_2["names"] != None and new_guest_2["names"][0] == "Egger") # Quick and dirty fix for SR Engler + ZB Egger (new NR) guest_added(conn, parlamentarier, new_guest_2, batch_time, pdf_date) if name_logic.are_guests_equal(new_guest_1, existing_guest_2): summary_row.set_new_guest_1(new_guest_2) else: summary_row.set_new_guest_2(new_guest_2) summary_rows.append(summary_row) #return("\n".join(summary_rows)) return (summary_rows)
def sync_data(group_type, conn, filename, batch_time): backup_filename = "{}-{:02d}-{:02d}-{}".format(batch_time.year, batch_time.month, batch_time.day, filename) print("\n\n-- ----------------------------- ") print("-- File: {}".format(backup_filename)) summary = Summary() for parlamentarier_id, nachname, vorname in db.get_active_parlamentarier( conn): summary.set_parlamentarier_name(parlamentarier_id, nachname + ", " + vorname) with open(filename) as data_file: content = json.load(data_file) pdf_date_str = content["metadata"]["pdf_creation_date"] archive_pdf_name = content["metadata"]["archive_pdf_name"] url = content["metadata"]["url"] pdf_date = datetime.strptime( pdf_date_str, "%Y-%m-%d %H:%M:%S") # 2019-07-12 14:55:08 stichdatum = pdf_date print("-- PDF creation date: {}".format(pdf_date)) print("-- PDF archive file: {}".format(archive_pdf_name)) print("-- URL: {}".format(content["metadata"]["url"])) print("-- ----------------------------- ") handle_removed_groups(group_type, content, conn, summary, stichdatum, batch_time, pdf_date) print('\n-- Sync pgs...') handled_organisation_ids = [] for group in content["data"]: members = group["praesidium"] + group["mitglieder"] organisation_id, inaktiv, name_de, name_fr, name_it, rechtsform = get_organisation( group, conn) # Skip duplicate groups: Aktive Mobilität and Langsamverkehr are twice in 23.11.2020 PDF if organisation_id and organisation_id in handled_organisation_ids: print('-- WARN: Organisation "{}" ID={} twice in PDF. Skipped'. format(name_de, organisation_id)) continue elif organisation_id: # do not add None handled_organisation_ids.append(organisation_id) if organisation_id: handle_names(group, name_de, name_fr, name_it, organisation_id, summary, conn, batch_time, pdf_date) else: print('-- INFO: Organisation "{}" not found in DB'.format( name_de)) handle_organisation(group_type, rechtsform, group, inaktiv, name_de, name_fr, name_it, organisation_id, summary, conn, batch_time, pdf_date) processed_parlamentarier_ids = [] for member, title in members: names = get_names(member) parlamentarier_id, parlamentarier_bis = db.get_parlamentarier_id_by_name( conn, names, False) if not parlamentarier_id: print( "DATA INTEGRITY FAILURE: Parlamentarier '{}' of group '{}' not found in database." .format(member, name_de)) sys.exit(1) elif parlamentarier_bis and parlamentarier_bis < date.today(): print( "-- INFO: Parlamentarier '{}' ({}) ist nicht mehr aktiv ('{}')" .format(member, parlamentarier_id, parlamentarier_bis)) continue elif parlamentarier_id in processed_parlamentarier_ids: print( '-- WARN: Ignore duplicate member "{}" ({}) in PG "{}"' .format(member, parlamentarier_id, name_de)) continue else: processed_parlamentarier_ids.append(parlamentarier_id) art = "vorstand" if title else "mitglied" db_parlamentarier = db.get_parlamentarier_dict( conn, parlamentarier_id) geschlecht = 0 if db_parlamentarier["geschlecht"] == 'M' else 1 funktion_im_gremium = literals.president_mapping[title][ 0] if title else None beschreibung = literals.president_mapping[title][1][ geschlecht] if title else "Mitglied" beschreibung_fr = literals.president_mapping[title][2][ geschlecht] if title else "Membre" interessenbindung_id = None if parlamentarier_id and organisation_id: interessenbindung_id, db_art, db_funktion_im_gremium, db_beschreibung, db_beschreibung_fr = db.get_interessenbindung_id( conn, parlamentarier_id, organisation_id, stichdatum) if not interessenbindung_id: print( "\n-- Neue Interessenbindung zwischen '{}' und '{}' als {}{}" .format( name_de, member, art, '/' + funktion_im_gremium if funktion_im_gremium else '')) if not organisation_id: organisation_id = '@last_parlamentarische_gruppe' summary.neue_gruppe(parlamentarier_id, "neu", name_de, art) else: summary.neue_gruppe(parlamentarier_id, organisation_id, name_de, art) print( sql_statement_generator. insert_interessenbindung_parlamentarische_gruppe( parlamentarier_id, organisation_id, stichdatum, title != None, beschreibung, beschreibung_fr, funktion_im_gremium, url, batch_time, pdf_date)) elif art != db_art: # Do not check funktion_im_gremium for change (simply update funktion_im_gremium) print( "\n-- Interessenbindungsart oder Funktion geändert zwischen '{}' und '{}': '{}', '{}'" .format(name_de, member, art, funktion_im_gremium)) print( sql_statement_generator.end_interessenbindung( interessenbindung_id, stichdatum, batch_time, pdf_date)) print( sql_statement_generator. insert_interessenbindung_parlamentarische_gruppe( parlamentarier_id, organisation_id, stichdatum, title != None, beschreibung, beschreibung_fr, funktion_im_gremium, url, batch_time, pdf_date)) summary.gruppe_veraendert(parlamentarier_id, organisation_id, name_de, art) elif funktion_im_gremium != db_funktion_im_gremium or beschreibung != db_beschreibung or beschreibung_fr != db_beschreibung_fr: print( "\n-- Interessenbindungsbeschreibung geändert '{}': '{}' → '{}' / '{}' → '{}' / '{}' → '{}'" .format(name_de, db_funktion_im_gremium, funktion_im_gremium, db_beschreibung, beschreibung, db_beschreibung_fr, beschreibung_fr)) print( sql_statement_generator. update_beschreibung_interessenbindung( interessenbindung_id, funktion_im_gremium, beschreibung, beschreibung_fr, url, batch_time, pdf_date)) summary.gruppe_veraendert(parlamentarier_id, organisation_id, name_de, art) else: summary.gruppe_unveraendert(parlamentarier_id, organisation_id, name_de, art) return (summary)
def sync_data(conn, filename, batch_time): backup_filename = "{}-{:02d}-{:02d}-{}".format( batch_time.year, batch_time.month, batch_time.day, filename) print("\n\n-- ----------------------------- ") print("-- File: {}".format(backup_filename)) summary = Summary() for parlamentarier_id, nachname, vorname in db.get_active_parlamentarier(conn): summary_row = summary.get_row(parlamentarier_id) summary_row.parlamentarier_name = nachname + ", " + vorname with open(filename) as data_file: content = json.load(data_file) stichdatum = datetime.strptime( content["metadata"]["pdf_creation_date"], "%Y-%m-%d %H:%M:%S") print("-- PDF creation date: {}".format(stichdatum)) print( "-- PDF archive file: {}".format(content["metadata"]["archive_pdf_name"])) print("-- ----------------------------- ") handle_removed_groups(content, conn, summary, stichdatum, batch_time) for group in content["data"]: name_de = normalize_organisation(group["name_de"]) name_fr = normalize_organisation(group["name_fr"]) name_it = normalize_organisation(group["name_it"]) members = group["praesidium"] organisation_id = db.get_organisation_id(conn, name_de, name_fr, name_it) if organisation_id: handle_names(group, name_de, name_fr, name_it, organisation_id, summary, conn, batch_time) handle_homepage_and_sekretariat(group, name_de, name_fr, name_it, organisation_id, summary, conn, batch_time) for member in members: names = get_names(member) parlamentarier_id = db.get_parlamentarier_id_by_name(conn, names) if not parlamentarier_id: print("DATA INTEGRITY FAILURE: Parlamentarier {} not found in database.".format(member)) sys.exit(1) parlamentarier_dict = db.get_parlamentarier_dict(conn, parlamentarier_id) geschlecht = parlamentarier_dict["geschlecht"] beschreibung = '' if len(members) > 1: if geschlecht == "M": beschreibung = "Co-Präsident" if geschlecht == "F": beschreibung = "Co-Präsidentin" interessenbindung_id = None if parlamentarier_id and organisation_id: interessenbindung_id = db.get_interessenbindung_id( conn, parlamentarier_id, organisation_id, stichdatum) summary_row = summary.get_row(parlamentarier_id) if not interessenbindung_id: print( "\n-- Neue Interessenbindung zwischen '{}' und '{}'".format(name_de, member)) if not organisation_id: organisation_id = '@last_parlamentarische_gruppe' summary_row.neue_gruppe("neu", name_de) else: summary_row.neue_gruppe(organisation_id, name_de) print(sql_statement_generator.insert_interessenbindung_parlamentarische_gruppe( parlamentarier_id, organisation_id, stichdatum, beschreibung, batch_time)) else: summary_row.gruppe_unveraendert(organisation_id, name_de) return(summary)
def sync_data(conn, filename, council, batch_time): backup_filename = "{}-{:02d}-{:02d}-{}".format(batch_time.year, batch_time.month, batch_time.day, filename) print("\n\n-- ----------------------------- ") print("-- {} ".format(council)) print("-- File: {}".format(backup_filename)) summary_rows = [] with open(filename) as data_file: content = json.load(data_file) print("-- PDF creation date: {}".format(content["metadata"]["pdf_creation_date"])) print("-- PDF archive file: {}".format(content["metadata"]["archive_pdf_name"])) print("-- ----------------------------- ") count = 1 for parlamentarier in content["data"]: #load info about parlamentarier kanton_id = db.get_kanton_id(conn, parlamentarier["canton"]) partei_id = db.get_partei_id(conn, parlamentarier["party"]) parlamentarier_id = db.get_parlamentarier_id(conn, parlamentarier["names"], kanton_id, partei_id) parlamentarier["id"] = parlamentarier_id parlamentarier_db_dict = db.get_parlamentarier_dict(conn, parlamentarier_id) parlamentarier_active = parlamentarier_db_dict['im_rat_bis'] == None #existing guests (from database) existing_guest_1, existing_guest_2 = db.get_guests(conn, parlamentarier_id) #new guests (from JSON file) new_guests = parlamentarier["guests"] new_guest_1 = new_guests[0] if len(new_guests) > 0 else None new_guest_2 = new_guests[1] if len(new_guests) > 1 else None #summary row summary_row = summary.SummaryRow(parlamentarier, count, parlamentarier_db_dict) count += 1 #check if existing guest 1 left or stayed if name_logic.are_guests_equal(existing_guest_1, new_guest_1): summary_row.set_guest_1(existing_guest_1) funktion_equal = guest_remained(parlamentarier, existing_guest_1, new_guest_1, batch_time) if not funktion_equal: summary_row.set_guest_1_changes("funktion") elif name_logic.are_guests_equal(existing_guest_1, new_guest_2): summary_row.set_guest_1(existing_guest_1) funktion_equal = guest_remained(parlamentarier, existing_guest_1, new_guest_2, batch_time) if not funktion_equal: summary_row.set_guest_1_changes("funktion") else: guest_removed(parlamentarier, existing_guest_1, batch_time) summary_row.set_removed_guest_1(existing_guest_1) #check if existing guest 2 left or stayed if name_logic.are_guests_equal(existing_guest_2, new_guest_1): summary_row.set_guest_2(existing_guest_2) funktion_equal = guest_remained(parlamentarier, existing_guest_2, new_guest_1, batch_time) if not funktion_equal: summary_row.set_guest_2_changes("funktion") elif name_logic.are_guests_equal(existing_guest_2, new_guest_2): summary_row.set_guest_2(existing_guest_2) funktion_equal = guest_remained(parlamentarier, existing_guest_2, new_guest_2, batch_time) if not funktion_equal: summary_row.set_guest_2_changes("funktion") else: guest_removed(parlamentarier, existing_guest_2, batch_time) summary_row.set_removed_guest_2(existing_guest_2) # check if new guest 1 was already here if not name_logic.are_guests_equal(new_guest_1, existing_guest_1) and not name_logic.are_guests_equal(new_guest_1, existing_guest_2) and parlamentarier_active: guest_added(conn, parlamentarier, new_guest_1, batch_time) summary_row.set_new_guest_1(new_guest_1) # check if new guest 2 was already here if not name_logic.are_guests_equal(new_guest_2, existing_guest_1) and not name_logic.are_guests_equal(new_guest_2, existing_guest_2) and parlamentarier_active: # and not (parlamentarier_id == 223 and new_guest_2 != None and new_guest_2["names"] != None and new_guest_2["names"][0] == "Egger") # Quick and dirty fix for SR Engler + ZB Egger (new NR) guest_added(conn, parlamentarier, new_guest_2, batch_time) if name_logic.are_guests_equal(new_guest_1, existing_guest_2): summary_row.set_new_guest_1(new_guest_2) else: summary_row.set_new_guest_2(new_guest_2) summary_rows.append(summary_row) #return("\n".join(summary_rows)) return(summary_rows)