コード例 #1
0
 def setUp(self):
     self.connect_to_database()
     self.clean_database()
     self.create_database_schema()
     self.insert_test_data()
     self._pgsql = PostgreSQL(
         "localhost",
         get_database_name(),
         get_database_user(),
         get_database_password(),
         get_database_port(),
     )
コード例 #2
0
def main():
    # initialize the dict that will hold all config objects
    config_list = []
    db_list = []

    # Detect and load audit config files into a dict of config objects
    # ToDo: Additional logic needed to include project files in local dir
    for project_file in sorted(pathlib.Path('../conf/').glob('project_capture*')):
        config = ConfigSectionKey('../conf', '../local')
        config.load('connect.ini')
        config.load(project_file)
        # conn_config drills through the project file to the connect.ini and returns a DatabaseSection object
        conn_config = config(config('project').database_source)

        if conn_config.platform == 'mssql':
            db_conn = MSSQL(conn_config)
            db = DatabaseAudit(f'{conn_config.platform}_audit', db_conn.conn)
            db.schema = conn_config.schema

        elif conn_config.platform == 'postgresql':
            db_conn = PostgreSQL(conn_config)
            db = DatabaseAudit(f'{conn_config.platform}_audit', db_conn.conn)
            db.schema = conn_config.schema
        else:
            print("platform not found. Config file's incorrectly set up")
        # add db and config objects to respective lists based on project_capture_* files
        config_list.append(config)
        db_list.append(db)

    # add target_db to the db_list[]
    target_conn_config = config('database:amc_dsg_udp_01_stage_dev')
    target_db_conn = MSSQL(target_conn_config)
    target_db = DatabaseAudit('mssql_audit', target_db_conn.conn)
    target_db.use_database('udp_stage_dev')
    db_list.append(target_db)

    # Excel Logic
    environment = 'dev'
    time = datetime.datetime.now()
    file_name = f'''..\output\Audit_Results_{environment}_{time:%Y-%m-%d}.xlsx'''

    # create workbook and worksheets
    workbook1 = xlsxwriter.Workbook(file_name)
    worksheet1 = workbook1.add_worksheet('Table Overview')
    worksheet2 = workbook1.add_worksheet('Column Overview')
    worksheet3 = workbook1.add_worksheet('Column Detail')

    # Start the magic
    iterate_row_count(config_list, db_list, workbook1, worksheet1)
    # iterate_null_columns(config_list, db_list, workbook1, worksheet2)
    # iterate_column_min_max(config_list, db_list, workbook1, worksheet3)

    # start it up
    workbook1.close()
    os.startfile(file_name)
コード例 #3
0
def get_repos():
    if request.method == "GET":
        base = PostgreSQL.connect()
        cursor = base.cursor(cursor_factory=psycopg2.extras.RealDictCursor)
        cursor.execute("SELECT id, author, name, avatar, url, description, language, stars, forks " +
                       " FROM public.repository WHERE test = false")
        data = jsonify(cursor.fetchall())
        cursor.close()
        base.close()
        return data, 200
    else:
        return "invalid method " + request.method, 405
コード例 #4
0
 def test_postgresql_connection(self):
     HOST = "localhost"
     DATABASE = get_database_name()
     USER = get_database_user()
     PASSWORD = get_database_password()
     PORT = get_database_port()
     with patch("psycopg2.connect",) as mock:
         pgsql = PostgreSQL(HOST, DATABASE, USER, PASSWORD, PORT)
         self.assertIsInstance(
             pgsql,
             DatabaseInterface,
             msg="PostgreSQL class should be a instance of DatabaseInterface in order to be usefull by the tasks",
         )
         mock.called_once()
コード例 #5
0
def set_repo():
    if request.method == "POST":
        dt = request.get_json()
        base = PostgreSQL.connect()
        cursor = base.cursor()
        query = 'INSERT INTO public.repository(author, name, avatar, url, description, language, stars, forks) ' + \
                    "VALUES('{0}', '{1}', '{2}', '{3}', '{4}', '{5}', {6}, {7});"\
                    .format(dt["author"], dt["name"],  dt["avatar"], dt["url"], dt["description"], dt["language"],
                            dt["stars"], dt["forks"])
        cursor.execute(query)
        base.commit()
        cursor.close()
        base.close()
        return "{ok: true}", 200
    else:
        return "invalid method " + request.method, 405
コード例 #6
0
# Program will scan for connections in 5 minute intervals
# and terminate disruptive connections. Deliberate connections
# should remain uninterrupted.

# Program will run in perpetuity until KeyBoard Interrupted.

while True:

    query = """
	SELECT *
	FROM pg_stat_activity
	WHERE datname='postgres'
	"""

    db = PostgreSQL()
    conn = db.connection
    cur = conn.cursor()
    cur.execute(query)

    result = pd.DataFrame.from_records(
        cur.fetchall(),
        columns=[
            "datid", "datname", "pid", "usesysid", "username",
            "application_name", "client_addr", "client_hostname",
            "client_port", "backend_start", "xact_start", "query_start",
            "state_change", "wait_event_type", "wait_event", "state",
            "backend_xid", "backend_xmin", "query", "backend_type"
        ])

    num_connections = len(result) - 1
コード例 #7
0
def main():
    # Excel Logic
    time = datetime.datetime.now()
    file_name = f'''..\output\Connection_Results_{time:%Y-%m-%d}.xlsx'''

    # create workbook and worksheets
    workbook = xlsxwriter.Workbook(file_name)
    worksheet1 = workbook.add_worksheet('Table Overview')

    # Workbook Formats
    header_format = workbook.add_format({'bold': True, 'underline': True})
    failure_format = workbook.add_format({'font_color': 'red', 'bold': True})
    success_format = workbook.add_format({'font_color': 'green', 'bold': True})
    undefined_format = workbook.add_format({
        'font_color': 'orange',
        'bold': True
    })

    # Write Headers
    worksheet1.write(0, 0, 'Database', header_format)
    worksheet1.write(0, 1, 'Connection Result', header_format)

    # Config logic
    config = ConfigSectionKey('../conf', '../local')
    config.load('connect.ini')

    # set y index
    row_index = 1

    for database in sorted(
        (db for db in config.sections if 'database:' in db)):
        db_name = database.partition(':')[2]
        db_config = config(database)

        worksheet1.write(row_index, 0, db_name)
        if not db_config.database:
            worksheet1.write(row_index, 1, 'Connection Undefined',
                             undefined_format)

        if db_config.platform == 'mssql':
            try:
                db_conn = MSSQL(db_config)
                print(f'{db_name}: Success')
                worksheet1.write(row_index, 1, 'Success', success_format)
            except Exception as e:
                print(f'{db_name}: Failed')
                worksheet1.write(row_index, 1, 'Failed', failure_format)
                worksheet1.write(row_index, 2, str(e))

        elif db_config.platform == 'postgresql':
            try:
                db_conn = PostgreSQL(db_config)
                print(f'{database}: Success')
                worksheet1.write(row_index, 1, 'Success', success_format)
            except Exception as e:
                print(f'{database}: Failed')
                worksheet1.write(row_index, 1, 'Failed', failure_format)
                worksheet1.write(row_index, 2, str(e))

        row_index += 1
    # start it up
    workbook.close()
    file_full_path = full_path(file_name)
    os.startfile(file_full_path)
コード例 #8
0
class PostgreSQLTests(TestCase):

    _data = []

    def setUp(self):
        self.connect_to_database()
        self.clean_database()
        self.create_database_schema()
        self.insert_test_data()
        self._pgsql = PostgreSQL(
            "localhost",
            get_database_name(),
            get_database_user(),
            get_database_password(),
            get_database_port(),
        )

    def tearDown(self):
        self.disconnect_database()

    def connect_to_database(self):
        self._dbconnection = psycopg2.connect(
            dbname=get_database_name(),
            user=get_database_user(),
            password=get_database_password(),
            host="localhost",
        )

    def disconnect_database(self):
        self._dbconnection.close()

    def create_database_schema(self):
        with self._dbconnection.cursor() as cursor:
            self.create_territories_table(cursor)
            self.create_gazettes_table(cursor)
            self._dbconnection.commit()

    def create_territories_table(self, cursor):
        cursor.execute(
            """
            CREATE TABLE territories (
                id character varying NOT NULL,
                name character varying,
                state_code character varying,
                state character varying
            ); """
        )
        cursor.execute(
            """
            ALTER TABLE ONLY territories
                ADD CONSTRAINT territories_pkey PRIMARY KEY (id);
            """
        )

    def create_gazettes_table(self, cursor):
        cursor.execute(
            """
            CREATE TABLE gazettes (
                id integer NOT NULL,
                source_text text,
                date date,
                edition_number character varying,
                is_extra_edition boolean,
                power character varying,
                file_checksum character varying,
                file_path character varying,
                file_url character varying,
                scraped_at timestamp without time zone,
                created_at timestamp without time zone,
                territory_id character varying,
                processed boolean
        );"""
        )
        cursor.execute(
            """
            CREATE SEQUENCE gazettes_id_seq
                AS integer
                START WITH 1
                INCREMENT BY 1
                NO MINVALUE
                NO MAXVALUE
                CACHE 1;
        """
        )
        cursor.execute(
            """
            ALTER TABLE ONLY gazettes
                ALTER COLUMN id SET DEFAULT nextval('gazettes_id_seq'::regclass);
        """
        )
        cursor.execute(
            """
            ALTER TABLE ONLY gazettes
                ADD CONSTRAINT gazettes_pkey PRIMARY KEY (id);
        """
        )
        cursor.execute(
            """
            ALTER TABLE ONLY gazettes
                ADD CONSTRAINT gazettes_territory_id_date_file_checksum_key UNIQUE (territory_id, date, file_checksum);
        """
        )

    def insert_territories_data(self, cursor):
        territory_data = {
            "id": "3550308",
            "name": "Gaspar",
            "state_code": "SC",
            "state": "Santa Catarina",
        }
        cursor.execute(
            """
            INSERT INTO territories (
	        id,
	        name ,
	        state_code,
	        state)
            VALUES (
                %(id)s,
                %(name)s,
                %(state_code)s,
                %(state)s
                );""",
            territory_data,
        )

    def insert_gazettes_data(self, cursor):
        for data in self._data:
            cursor.execute(
                """
                INSERT INTO gazettes (
                    id,
                    source_text,
                    date ,
                    edition_number,
                    is_extra_edition,
                    power,
                    file_checksum ,
                    file_path ,
                    file_url ,
                    scraped_at,
                    created_at,
                    territory_id,
                    processed)
                VALUES ( %(id)s,
                    %(source_text)s,
                    %(date)s ,
                    %(edition_number)s,
                    %(is_extra_edition)s,
                    %(power)s,
                    %(file_checksum)s ,
                    %(file_path)s ,
                    %(file_url)s ,
                    %(scraped_at)s,
                    %(created_at)s,
                    %(territory_id)s,
                    %(processed)s);""",
                data,
            )

    def insert_test_data(self):
        self.generate_fake_data()
        with self._dbconnection.cursor() as cursor:
            self.insert_territories_data(cursor)
            self.insert_gazettes_data(cursor)
            self._dbconnection.commit()

    def generate_fake_data(self):
        self._data.clear()
        for id in range(10):
            self._data.append(
                {
                    "id": id,
                    "source_text": "",
                    "date": date.today(),
                    "edition_number": str(id),
                    "is_extra_edition": False,
                    "power": "executive",
                    "file_checksum": str(uuid.uuid1()),
                    "file_path": f"my/fake/path/gazette/{id}.pdf",
                    "file_url": "www.querido-diario.org",
                    "scraped_at": datetime.now(),
                    "created_at": datetime.now(),
                    "territory_id": "3550308",
                    "processed": False,
                    "state_code": "SC",
                    "territory_name": "Gaspar",
                }
            )
        self.set_some_fake_data_as_ingested_by_the_system_and_no_need_to_be_processed()

    def set_some_fake_data_as_ingested_by_the_system_and_no_need_to_be_processed(self):
        self._data[-1]["processed"] = True
        self._data[-2]["processed"] = True

    def get_gazettes_pending_to_be_processed(self):
        for gazette in self._data:
            if gazette["processed"] == False:
                yield gazette

    def clean_database(self):
        with self._dbconnection.cursor() as cursor:
            cursor.execute("DROP TABLE IF EXISTS gazettes;")
            cursor.execute("DROP TABLE IF EXISTS territories;")
            cursor.execute("DROP SEQUENCE IF EXISTS gazettes_id_seq;")
            self._dbconnection.commit()

    def test_get_gazettes(self):
        gazettes_in_the_database = self._pgsql.get_pending_gazettes()
        self.assertCountEqual(
            gazettes_in_the_database, self.get_gazettes_pending_to_be_processed()
        )

    def test_get_pending_gazettes(self):
        gazettes_in_the_database = self._pgsql.get_pending_gazettes()
        expected_gazettes = self.get_gazettes_pending_to_be_processed()
        self.assertCountEqual(gazettes_in_the_database, expected_gazettes)

    def test_set_gazette_as_processed(self):
        pending_gazettes = self.get_gazettes_pending_to_be_processed()
        for gazette in pending_gazettes:
            self._pgsql.set_gazette_as_processed(
                gazette["id"], gazette["file_checksum"]
            )
        pending_gazettes_in_database = self._pgsql.get_pending_gazettes()
        self.assertEqual(0, len(list(pending_gazettes_in_database)))