def setUp(self): self.connect_to_database() self.clean_database() self.create_database_schema() self.insert_test_data() self._pgsql = PostgreSQL( "localhost", get_database_name(), get_database_user(), get_database_password(), get_database_port(), )
def main(): # initialize the dict that will hold all config objects config_list = [] db_list = [] # Detect and load audit config files into a dict of config objects # ToDo: Additional logic needed to include project files in local dir for project_file in sorted(pathlib.Path('../conf/').glob('project_capture*')): config = ConfigSectionKey('../conf', '../local') config.load('connect.ini') config.load(project_file) # conn_config drills through the project file to the connect.ini and returns a DatabaseSection object conn_config = config(config('project').database_source) if conn_config.platform == 'mssql': db_conn = MSSQL(conn_config) db = DatabaseAudit(f'{conn_config.platform}_audit', db_conn.conn) db.schema = conn_config.schema elif conn_config.platform == 'postgresql': db_conn = PostgreSQL(conn_config) db = DatabaseAudit(f'{conn_config.platform}_audit', db_conn.conn) db.schema = conn_config.schema else: print("platform not found. Config file's incorrectly set up") # add db and config objects to respective lists based on project_capture_* files config_list.append(config) db_list.append(db) # add target_db to the db_list[] target_conn_config = config('database:amc_dsg_udp_01_stage_dev') target_db_conn = MSSQL(target_conn_config) target_db = DatabaseAudit('mssql_audit', target_db_conn.conn) target_db.use_database('udp_stage_dev') db_list.append(target_db) # Excel Logic environment = 'dev' time = datetime.datetime.now() file_name = f'''..\output\Audit_Results_{environment}_{time:%Y-%m-%d}.xlsx''' # create workbook and worksheets workbook1 = xlsxwriter.Workbook(file_name) worksheet1 = workbook1.add_worksheet('Table Overview') worksheet2 = workbook1.add_worksheet('Column Overview') worksheet3 = workbook1.add_worksheet('Column Detail') # Start the magic iterate_row_count(config_list, db_list, workbook1, worksheet1) # iterate_null_columns(config_list, db_list, workbook1, worksheet2) # iterate_column_min_max(config_list, db_list, workbook1, worksheet3) # start it up workbook1.close() os.startfile(file_name)
def get_repos(): if request.method == "GET": base = PostgreSQL.connect() cursor = base.cursor(cursor_factory=psycopg2.extras.RealDictCursor) cursor.execute("SELECT id, author, name, avatar, url, description, language, stars, forks " + " FROM public.repository WHERE test = false") data = jsonify(cursor.fetchall()) cursor.close() base.close() return data, 200 else: return "invalid method " + request.method, 405
def test_postgresql_connection(self): HOST = "localhost" DATABASE = get_database_name() USER = get_database_user() PASSWORD = get_database_password() PORT = get_database_port() with patch("psycopg2.connect",) as mock: pgsql = PostgreSQL(HOST, DATABASE, USER, PASSWORD, PORT) self.assertIsInstance( pgsql, DatabaseInterface, msg="PostgreSQL class should be a instance of DatabaseInterface in order to be usefull by the tasks", ) mock.called_once()
def set_repo(): if request.method == "POST": dt = request.get_json() base = PostgreSQL.connect() cursor = base.cursor() query = 'INSERT INTO public.repository(author, name, avatar, url, description, language, stars, forks) ' + \ "VALUES('{0}', '{1}', '{2}', '{3}', '{4}', '{5}', {6}, {7});"\ .format(dt["author"], dt["name"], dt["avatar"], dt["url"], dt["description"], dt["language"], dt["stars"], dt["forks"]) cursor.execute(query) base.commit() cursor.close() base.close() return "{ok: true}", 200 else: return "invalid method " + request.method, 405
# Program will scan for connections in 5 minute intervals # and terminate disruptive connections. Deliberate connections # should remain uninterrupted. # Program will run in perpetuity until KeyBoard Interrupted. while True: query = """ SELECT * FROM pg_stat_activity WHERE datname='postgres' """ db = PostgreSQL() conn = db.connection cur = conn.cursor() cur.execute(query) result = pd.DataFrame.from_records( cur.fetchall(), columns=[ "datid", "datname", "pid", "usesysid", "username", "application_name", "client_addr", "client_hostname", "client_port", "backend_start", "xact_start", "query_start", "state_change", "wait_event_type", "wait_event", "state", "backend_xid", "backend_xmin", "query", "backend_type" ]) num_connections = len(result) - 1
def main(): # Excel Logic time = datetime.datetime.now() file_name = f'''..\output\Connection_Results_{time:%Y-%m-%d}.xlsx''' # create workbook and worksheets workbook = xlsxwriter.Workbook(file_name) worksheet1 = workbook.add_worksheet('Table Overview') # Workbook Formats header_format = workbook.add_format({'bold': True, 'underline': True}) failure_format = workbook.add_format({'font_color': 'red', 'bold': True}) success_format = workbook.add_format({'font_color': 'green', 'bold': True}) undefined_format = workbook.add_format({ 'font_color': 'orange', 'bold': True }) # Write Headers worksheet1.write(0, 0, 'Database', header_format) worksheet1.write(0, 1, 'Connection Result', header_format) # Config logic config = ConfigSectionKey('../conf', '../local') config.load('connect.ini') # set y index row_index = 1 for database in sorted( (db for db in config.sections if 'database:' in db)): db_name = database.partition(':')[2] db_config = config(database) worksheet1.write(row_index, 0, db_name) if not db_config.database: worksheet1.write(row_index, 1, 'Connection Undefined', undefined_format) if db_config.platform == 'mssql': try: db_conn = MSSQL(db_config) print(f'{db_name}: Success') worksheet1.write(row_index, 1, 'Success', success_format) except Exception as e: print(f'{db_name}: Failed') worksheet1.write(row_index, 1, 'Failed', failure_format) worksheet1.write(row_index, 2, str(e)) elif db_config.platform == 'postgresql': try: db_conn = PostgreSQL(db_config) print(f'{database}: Success') worksheet1.write(row_index, 1, 'Success', success_format) except Exception as e: print(f'{database}: Failed') worksheet1.write(row_index, 1, 'Failed', failure_format) worksheet1.write(row_index, 2, str(e)) row_index += 1 # start it up workbook.close() file_full_path = full_path(file_name) os.startfile(file_full_path)
class PostgreSQLTests(TestCase): _data = [] def setUp(self): self.connect_to_database() self.clean_database() self.create_database_schema() self.insert_test_data() self._pgsql = PostgreSQL( "localhost", get_database_name(), get_database_user(), get_database_password(), get_database_port(), ) def tearDown(self): self.disconnect_database() def connect_to_database(self): self._dbconnection = psycopg2.connect( dbname=get_database_name(), user=get_database_user(), password=get_database_password(), host="localhost", ) def disconnect_database(self): self._dbconnection.close() def create_database_schema(self): with self._dbconnection.cursor() as cursor: self.create_territories_table(cursor) self.create_gazettes_table(cursor) self._dbconnection.commit() def create_territories_table(self, cursor): cursor.execute( """ CREATE TABLE territories ( id character varying NOT NULL, name character varying, state_code character varying, state character varying ); """ ) cursor.execute( """ ALTER TABLE ONLY territories ADD CONSTRAINT territories_pkey PRIMARY KEY (id); """ ) def create_gazettes_table(self, cursor): cursor.execute( """ CREATE TABLE gazettes ( id integer NOT NULL, source_text text, date date, edition_number character varying, is_extra_edition boolean, power character varying, file_checksum character varying, file_path character varying, file_url character varying, scraped_at timestamp without time zone, created_at timestamp without time zone, territory_id character varying, processed boolean );""" ) cursor.execute( """ CREATE SEQUENCE gazettes_id_seq AS integer START WITH 1 INCREMENT BY 1 NO MINVALUE NO MAXVALUE CACHE 1; """ ) cursor.execute( """ ALTER TABLE ONLY gazettes ALTER COLUMN id SET DEFAULT nextval('gazettes_id_seq'::regclass); """ ) cursor.execute( """ ALTER TABLE ONLY gazettes ADD CONSTRAINT gazettes_pkey PRIMARY KEY (id); """ ) cursor.execute( """ ALTER TABLE ONLY gazettes ADD CONSTRAINT gazettes_territory_id_date_file_checksum_key UNIQUE (territory_id, date, file_checksum); """ ) def insert_territories_data(self, cursor): territory_data = { "id": "3550308", "name": "Gaspar", "state_code": "SC", "state": "Santa Catarina", } cursor.execute( """ INSERT INTO territories ( id, name , state_code, state) VALUES ( %(id)s, %(name)s, %(state_code)s, %(state)s );""", territory_data, ) def insert_gazettes_data(self, cursor): for data in self._data: cursor.execute( """ INSERT INTO gazettes ( id, source_text, date , edition_number, is_extra_edition, power, file_checksum , file_path , file_url , scraped_at, created_at, territory_id, processed) VALUES ( %(id)s, %(source_text)s, %(date)s , %(edition_number)s, %(is_extra_edition)s, %(power)s, %(file_checksum)s , %(file_path)s , %(file_url)s , %(scraped_at)s, %(created_at)s, %(territory_id)s, %(processed)s);""", data, ) def insert_test_data(self): self.generate_fake_data() with self._dbconnection.cursor() as cursor: self.insert_territories_data(cursor) self.insert_gazettes_data(cursor) self._dbconnection.commit() def generate_fake_data(self): self._data.clear() for id in range(10): self._data.append( { "id": id, "source_text": "", "date": date.today(), "edition_number": str(id), "is_extra_edition": False, "power": "executive", "file_checksum": str(uuid.uuid1()), "file_path": f"my/fake/path/gazette/{id}.pdf", "file_url": "www.querido-diario.org", "scraped_at": datetime.now(), "created_at": datetime.now(), "territory_id": "3550308", "processed": False, "state_code": "SC", "territory_name": "Gaspar", } ) self.set_some_fake_data_as_ingested_by_the_system_and_no_need_to_be_processed() def set_some_fake_data_as_ingested_by_the_system_and_no_need_to_be_processed(self): self._data[-1]["processed"] = True self._data[-2]["processed"] = True def get_gazettes_pending_to_be_processed(self): for gazette in self._data: if gazette["processed"] == False: yield gazette def clean_database(self): with self._dbconnection.cursor() as cursor: cursor.execute("DROP TABLE IF EXISTS gazettes;") cursor.execute("DROP TABLE IF EXISTS territories;") cursor.execute("DROP SEQUENCE IF EXISTS gazettes_id_seq;") self._dbconnection.commit() def test_get_gazettes(self): gazettes_in_the_database = self._pgsql.get_pending_gazettes() self.assertCountEqual( gazettes_in_the_database, self.get_gazettes_pending_to_be_processed() ) def test_get_pending_gazettes(self): gazettes_in_the_database = self._pgsql.get_pending_gazettes() expected_gazettes = self.get_gazettes_pending_to_be_processed() self.assertCountEqual(gazettes_in_the_database, expected_gazettes) def test_set_gazette_as_processed(self): pending_gazettes = self.get_gazettes_pending_to_be_processed() for gazette in pending_gazettes: self._pgsql.set_gazette_as_processed( gazette["id"], gazette["file_checksum"] ) pending_gazettes_in_database = self._pgsql.get_pending_gazettes() self.assertEqual(0, len(list(pending_gazettes_in_database)))