예제 #1
0
파일: reload.py 프로젝트: zyclove/gpdb
    def validate_mid_level_partitions(self, schema_name, table_name):
        partition_level, max_level = None, None
        conn = dbconn.connect(
            dbconn.DbURL(dbname=self.database, port=self.port))
        try:
            parent_schema, parent_table = self.parent_partition_map[(
                schema_name, table_name)]
            if (parent_schema, parent_table) == (schema_name, table_name):
                return
            try:
                res = dbconn.query(
                    conn,
                    """ SELECT isleaf from pg_partition_tree('%s.%s') """ %
                    (schema_name, table_name))
            except Exception as e:
                logger.debug(
                    'Unable to get the partition information for table %s: (%s)'
                    % (table_name, str(e)))

            for isleaf in res.fetchall():
                if not isleaf[0]:
                    raise Exception(
                        'Mid Level partition %s.%s is not supported by gpreload. Please specify only leaf partitions or parent table name'
                        % (schema_name, table_name))
        finally:
            conn.close()
예제 #2
0
 def run(self):
     logger.debug('Executing query (%s:%s) for segment (%s:%s) on database (%s)' % (
         self.qname, self.query, self.hostname, self.port, self.dbname))
     with dbconn.connect(dbconn.DbURL(hostname=self.hostname, port=self.port, dbname=self.dbname),
                         utility=True) as conn:
         self.res = dbconn.query(conn, self.query).fetchall()
     conn.close()
예제 #3
0
 def _get_dblist(self):
     # template0 does not accept any connections so we exclude it
     with closing(dbconn.connect(dbconn.DbURL())) as conn:
         res = dbconn.query(
             conn,
             "SELECT datname FROM PG_DATABASE WHERE datname != 'template0'")
         return res.fetchall()
예제 #4
0
def explain_index_scan(conn, sqlStr):
    cost = -1.0
    scan_type = ""
    try:
        log_output("")
        log_output("Executing query: %s" % ("explain " + sqlStr))
        exp_curs = dbconn.query(conn, "explain " + sqlStr)
        rows = exp_curs.fetchall()
        table_scan_pattern = TABLE_SCAN_PATTERN
        index_scan_pattern = INDEX_SCAN_PATTERN
        bitmap_scan_pattern = BITMAP_SCAN_PATTERN
        if (glob_gpdb_major_version) <= 5:
            table_scan_pattern = TABLE_SCAN_PATTERN_V5
            index_scan_pattern = INDEX_SCAN_PATTERN_V5
            bitmap_scan_pattern = BITMAP_SCAN_PATTERN_V5

        for row in rows:
            log_output(row[0])
            if re.search(TABLE_NAME_PATTERN, row[0]) or re.search(
                    NDV_TABLE_NAME_PATTERN, row[0]):
                if re.search(bitmap_scan_pattern, row[0]):
                    scan_type = BITMAP_SCAN
                    cost = cost_from_explain_line(row[0])
                elif re.search(index_scan_pattern, row[0]):
                    scan_type = INDEX_SCAN
                    cost = cost_from_explain_line(row[0])
                elif re.search(table_scan_pattern, row[0]):
                    scan_type = TABLE_SCAN
                    cost = cost_from_explain_line(row[0])

    except Exception as e:
        log_output("\n*** ERROR explaining query:\n%s;\nReason: %s" %
                   ("explain " + sqlStr, e))

    return (scan_type, cost)
예제 #5
0
파일: utils.py 프로젝트: zh874794037/gpdb
def query_sql(dbname, sql):
    result = None

    with dbconn.connect(dbconn.DbURL(dbname=dbname),
                        unsetSearchPath=False) as conn:
        result = dbconn.query(conn, sql)
    return result
예제 #6
0
    def __init__(self, name):
        self.name = name
        self.path = tempfile.mkdtemp()
        self.dbname = 'tablespace_db_%s' % name
        self.table_counter = 0
        self.initial_data = None

        gparray = GpArray.initFromCatalog(dbconn.DbURL())
        for host in gparray.getHostList():
            run_cmd('ssh %s mkdir -p %s' %
                    (pipes.quote(host), pipes.quote(self.path)))

        conn = dbconn.connect(dbconn.DbURL(), unsetSearchPath=False)
        dbconn.execSQL(
            conn,
            "CREATE TABLESPACE %s LOCATION '%s'" % (self.name, self.path))
        dbconn.execSQL(
            conn,
            "CREATE DATABASE %s TABLESPACE %s" % (self.dbname, self.name))
        conn.close()

        conn = dbconn.connect(dbconn.DbURL(dbname=self.dbname),
                              unsetSearchPath=False)
        dbconn.execSQL(conn, "CREATE TABLE tbl (i int) DISTRIBUTED RANDOMLY")
        dbconn.execSQL(conn, "INSERT INTO tbl VALUES (GENERATE_SERIES(0, 25))")

        # save the distributed data for later verification
        self.initial_data = dbconn.query(
            conn, "SELECT gp_segment_id, i FROM tbl").fetchall()
        conn.close()
예제 #7
0
    def verify_for_gpexpand(self, hostname=None, port=0):
        """
        For gpexpand, we need make sure:
          1. data is the same after redistribution finished
          2. the table's numsegments is enlarged to the new cluster size
        """
        url = dbconn.DbURL(hostname=hostname, port=port, dbname=self.dbname)
        with closing(dbconn.connect(url, unsetSearchPath=False)) as conn:
            data = dbconn.query(conn,
                                "SELECT gp_segment_id, i FROM tbl").fetchall()
            tbl_numsegments = dbconn.querySingleton(
                conn, "SELECT numsegments FROM gp_distribution_policy "
                "WHERE localoid = 'tbl'::regclass::oid")
            num_segments = dbconn.querySingleton(
                conn,
                "SELECT COUNT(DISTINCT(content)) - 1 FROM gp_segment_configuration"
            )
        if tbl_numsegments != num_segments:
            raise Exception(
                "After gpexpand the numsegments for tablespace table 'tbl' %d does not match "
                "the number of segments in the cluster %d." %
                (tbl_numsegments, num_segments))

        initial_data = [i for _, i in self.initial_data]
        data_without_segid = [i for _, i in data]
        if sorted(data_without_segid) != sorted(initial_data):
            raise Exception(
                "Tablespace data is not identically distributed after running gp_expand. "
                "Expected pre-gpexpand data:\n%\n but found post-gpexpand data:\n%r"
                % (sorted(self.initial_data), sorted(data)))
예제 #8
0
 def __fetchSingleOutputRow(self, conn, sql, retry=False):
     """
     Execute specified SQL command and return what we expect to be a single row.
     Raise an exception when more or fewer than one row is seen and when more
     than one row is seen display up to 10 rows as logger warnings.
     """
     cursor   = dbconn.query(conn, sql)
     numrows  = cursor.rowcount
     numshown = 0
     res      = None
     for row in cursor:
         if numrows != 1:
             #
             # if we got back more than one row
             # we print a few of the rows first
             # instead of immediately raising an exception
             #
             numshown += 1
             if numshown > 10:
                 break
             logger.warning('>>> %s' % row)
         else:
             assert res is None
             res = row
             assert res is not None
     cursor.close()
     if numrows != 1:
         raise Exception("SQL returned %d rows, not 1 as expected:\n%s" % (numrows, sql))
     return res
예제 #9
0
    def verify(self, hostname=None, port=0):
        """
        Verify tablespace functionality by ensuring the tablespace can be
        written to, read from, and the initial data is still correctly
        distributed.
        """
        url = dbconn.DbURL(hostname=hostname, port=port, dbname=self.dbname)
        with dbconn.connect(url, unsetSearchPath=False) as conn:
            data = dbconn.query(conn,
                                "SELECT gp_segment_id, i FROM tbl").fetchall()

            # verify that we can still write to the tablespace
            self.table_counter += 1
            dbconn.execSQL(
                conn, "CREATE TABLE tbl_%s (i int) DISTRIBUTED RANDOMLY" %
                self.table_counter)
            dbconn.execSQL(
                conn, "INSERT INTO tbl_%s VALUES (GENERATE_SERIES(0, 25))" %
                self.table_counter)
        conn.close()

        if sorted(data) != sorted(self.initial_data):
            raise Exception(
                "Tablespace data is not identically distributed. Expected:\n%r\n but found:\n%r"
                % (sorted(self.initial_data), sorted(data)))
예제 #10
0
파일: utils.py 프로젝트: zh874794037/gpdb
def getRows(dbname, exec_sql):
    with closing(
            dbconn.connect(dbconn.DbURL(dbname=dbname),
                           unsetSearchPath=False)) as conn:
        curs = dbconn.query(conn, exec_sql)
        results = curs.fetchall()
    return results
예제 #11
0
 def _get_dist_policies(self):
     policies = []
     with dbconn.connect(dbconn.DbURL(), unsetSearchPath=False) as conn:
         cursor = dbconn.query(conn, 'select * from gp_distribution_policy;').fetchall()
         for row in cursor:
             policies.append(row)
     conn.close()
     return policies
예제 #12
0
파일: catalog.py 프로젝트: zyclove/gpdb
def basicSQLExec(conn, sql):
    cursor = None
    try:
        cursor = dbconn.query(conn, sql)
        rows = cursor.fetchall()
        return rows
    finally:
        if cursor:
            cursor.close()
예제 #13
0
def select_version(conn):
    global glob_gpdb_major_version
    sqlStr = "SELECT version()"
    curs = dbconn.query(conn, sqlStr)

    rows = curs.fetchall()
    for row in rows:
        log_output(row[0])
        glob_gpdb_major_version = int(
            re.sub(".*Greenplum Database ([0-9]*)\..*", "\\1", row[0]))
        log_output("GPDB major version is %d" % glob_gpdb_major_version)

    log_output("Backend pid:")
    sqlStr = "SELECT pg_backend_pid()"
    curs = dbconn.query(conn, sqlStr)

    rows = curs.fetchall()
    for row in rows:
        log_output(str(row[0]))
    def verify(self, conn):
        sql = '''
            select c1 from {tablename} order by c1;
        '''.format(tablename=self.tablename, counter=self.counter)
        results = dbconn.query(conn, sql).fetchall()

        for i in range(self.counter, self.datasize):
            if i != int(results[i - self.counter][0]):
                self.report_incorrect_result()
                return
예제 #15
0
파일: catalog.py 프로젝트: zyclove/gpdb
def dropSchemaIfExist(conn, schemaname):
    sql = "SELECT nspname FROM pg_catalog.pg_namespace WHERE nspname = '%s'" % schemaname
    dropsql = "DROP SCHEMA %s" % schemaname
    cursor = None
    try:
        cursor = dbconn.query(conn, sql)
        numrows = cursor.rowcount
        if numrows == 1:
            cursor = dbconn.query(conn, dropsql)
        else:
            raise CatalogError("more than one entry in pg_namespace for '%s'" %
                               schemaname)
    except Exception as e:
        raise CatalogError("error dropping schema %s: %s" %
                           (schemaname, str(e)))
    finally:
        if cursor:
            cursor.close()
        conn.commit()
예제 #16
0
파일: utils.py 프로젝트: robertmu/gpdb
def get_primary_segment_host_port_for_content(content='0'):
    """
    return host, port of primary segment for the content id
    """
    get_psegment_sql = "SELECT hostname, port FROM gp_segment_configuration WHERE content=%s AND role='p';" % content
    with closing(dbconn.connect(dbconn.DbURL(dbname='template1'), unsetSearchPath=False)) as conn:
        cur = dbconn.query(conn, get_psegment_sql)
        rows = cur.fetchall()
        primary_seg_host = rows[0][0]
        primary_seg_port = rows[0][1]
    return primary_seg_host, primary_seg_port
예제 #17
0
파일: utils.py 프로젝트: robertmu/gpdb
def check_db_exists(dbname, host=None, port=0, user=None):
    LIST_DATABASE_SQL = 'SELECT datname FROM pg_database'

    results = []
    with closing(dbconn.connect(dbconn.DbURL(hostname=host, username=user, port=port, dbname='template1'), unsetSearchPath=False)) as conn:
        curs = dbconn.query(conn, LIST_DATABASE_SQL)
        results = curs.fetchall()
    for result in results:
        if result[0] == dbname:
            return True

    return False
예제 #18
0
파일: utils.py 프로젝트: robertmu/gpdb
def get_primary_segment_host_port():
    """
    return host, port of primary segment (dbid 2)
    """
    FIRST_PRIMARY_DBID = 2
    get_psegment_sql = 'select hostname, port from gp_segment_configuration where dbid=%i;' % FIRST_PRIMARY_DBID
    with closing(dbconn.connect(dbconn.DbURL(dbname='template1'), unsetSearchPath=False)) as conn:
        cur = dbconn.query(conn, get_psegment_sql)
        rows = cur.fetchall()
        primary_seg_host = rows[0][0]
        primary_seg_port = rows[0][1]
    return primary_seg_host, primary_seg_port
예제 #19
0
파일: utils.py 프로젝트: robertmu/gpdb
def stop_primary(context, content_id):
    get_psegment_sql = 'select datadir, hostname from gp_segment_configuration where content=%i and role=\'p\';' % content_id
    with closing(dbconn.connect(dbconn.DbURL(dbname='template1'), unsetSearchPath=False)) as conn:
        cur = dbconn.query(conn, get_psegment_sql)
        rows = cur.fetchall()
        seg_data_dir = rows[0][0]
        seg_host = rows[0][1]

    # For demo_cluster tests that run on the CI gives the error 'bash: pg_ctl: command not found'
    # Thus, need to add pg_ctl to the path when ssh'ing to a demo cluster.
    subprocess.check_call(['ssh', seg_host,
                           'source %s/greenplum_path.sh && pg_ctl stop -m fast -D %s' % (
                               pipes.quote(os.environ.get("GPHOME")), pipes.quote(seg_data_dir))
                           ])
예제 #20
0
def impl(context, filename, content):
    line = ""
    with closing(
            dbconn.connect(dbconn.DbURL(dbname='template1'),
                           unsetSearchPath=False)) as conn:
        result = dbconn.query(
            conn,
            "SELECT port, hostname, datadir FROM gp_segment_configuration WHERE preferred_role='p' AND content = %s;"
            % content).fetchall()
        port, hostname, datadir = result[0][0], result[0][1], result[0][2]
        line = "%s|%s|%s %s|%s|/tmp/newdir" % (hostname, port, datadir,
                                               hostname, port)

    with open("/tmp/%s" % filename, "w") as fd:
        fd.write("%s\n" % line)
예제 #21
0
파일: catalog.py 프로젝트: zyclove/gpdb
def doesSchemaExist(conn, schemaname):
    sql = "SELECT nspname FROM pg_catalog.pg_namespace WHERE nspname = '%s'" % schemaname
    cursor = None
    try:
        cursor = dbconn.query(conn, sql)
        numrows = cursor.rowcount
        if numrows == 0:
            return False
        elif numrows == 1:
            return True
        else:
            raise CatalogError("more than one entry in pg_namespace for '%s'" %
                               schemaname)
    finally:
        if cursor:
            cursor.close()
예제 #22
0
    def refresh(self):
        query = (
            "SELECT dbid, content, port, datadir, role, preferred_role, status, mode "
            "FROM gp_segment_configuration s WHERE 1 = 1")

        print '%s: fetching cluster configuration' % (datetime.datetime.now())
        dburl = dbconn.DbURL(self.hostname, self.port, self.dbname)
        print '%s: fetched cluster configuration' % (datetime.datetime.now())

        try:
            with dbconn.connect(dburl, utility=True,
                                unsetSearchPath=False) as conn:
                resultsets = dbconn.query(conn, query).fetchall()
        except Exception, e:
            print e
            sys.exit(1)
    def test_TableMainColumn_tablenames_exist(self):
        gpcheckcat_file = os.path.abspath(
            os.path.dirname(__file__) + "/../../../gpcheckcat")
        subject = imp.load_source('gpcheckcat', gpcheckcat_file)

        dburl = dbconn.DbURL(hostname=os.getenv('HOSTNAME', 'localhost'),
                             port=os.getenv('PGPORT', 5432),
                             dbname=os.getenv('PGDATABASE', 'postgres'))
        conn = dbconn.connect(dburl)
        table_query = "select count(*) from pg_class where relname='{table_name}'"

        # 5.json has an incomplete list of catalog tables
        # src/backend/catalog has .h files for some catalog tables
        # gpdb-doc/dita/ref_guide/system_catalogs/ has .xml files for almost all catalog tables
        for key in subject.TableMainColumn.keys():
            cursor = dbconn.query(conn, table_query.format(table_name=key))
            self.assertTrue(cursor.rowcount == 1,
                            "%s not found in catalog dir" % key)
예제 #24
0
def explain_join_scan(conn, sqlStr):
    cost = -1.0
    scan_type = ""
    try:
        log_output("")
        log_output("Executing query: %s" % ("explain " + sqlStr))
        exp_curs = dbconn.query(conn, "explain " + sqlStr)
        rows = exp_curs.fetchall()
        hash_join_pattern = HASH_JOIN_PATTERN
        nl_join_pattern = NL_JOIN_PATTERN
        table_scan_pattern = TABLE_SCAN_PATTERN
        index_scan_pattern = INDEX_SCAN_PATTERN
        bitmap_scan_pattern = BITMAP_SCAN_PATTERN
        if (glob_gpdb_major_version) <= 5:
            hash_join_pattern = HASH_JOIN_PATTERN_V5
            nl_join_pattern = NL_JOIN_PATTERN_V5
            table_scan_pattern = TABLE_SCAN_PATTERN_V5
            index_scan_pattern = INDEX_SCAN_PATTERN_V5
            bitmap_scan_pattern = BITMAP_SCAN_PATTERN_V5

        # save the cost of the join above the scan type
        for row in rows:
            log_output(row[0])
            if re.search(nl_join_pattern, row[0]):
                cost = cost_from_explain_line(row[0])
            elif re.search(hash_join_pattern, row[0]):
                cost = cost_from_explain_line(row[0])
            # mark the scan type used underneath the join
            if re.search(TABLE_NAME_PATTERN, row[0]) or re.search(
                    BFV_TABLE_NAME_PATTERN, row[0]):
                if re.search(bitmap_scan_pattern, row[0]):
                    scan_type = BITMAP_SCAN
                elif re.search(index_scan_pattern, row[0]):
                    scan_type = INDEX_SCAN
                elif re.search(table_scan_pattern, row[0]):
                    scan_type = TABLE_SCAN

    except Exception as e:
        log_output("\n*** ERROR explaining query:\n%s;\nReason: %s" %
                   ("explain " + sqlStr, e))

    return (scan_type, cost)
예제 #25
0
파일: reload.py 프로젝트: vmuravyev/gpdb
    def get_parent_partitions(self):
        with closing(
                dbconn.connect(
                    dbconn.DbURL(dbname=self.database,
                                 port=self.port))) as conn:
            for schema, table, col_list in self.table_list:
                PARENT_PARTITION_TABLENAME = """SELECT schemaname, tablename
                                                FROM pg_partitions
                                                WHERE partitiontablename='%s' 
                                                AND partitionschemaname='%s'""" % (
                    table, schema)
                res = dbconn.query(conn, PARENT_PARTITION_TABLENAME)
                for r in res:
                    self.parent_partition_map[(schema, table)] = (r[0], r[1])

                if (schema, table) not in self.parent_partition_map:
                    self.parent_partition_map[(schema, table)] = (schema,
                                                                  table)

        return self.parent_partition_map
예제 #26
0
파일: utils.py 프로젝트: robertmu/gpdb
def wait_for_desired_query_result_on_segment(host, port, query, desired_result, num_retries=150):
    """
    Tries once a second to check for the desired query result on the segment.
    Raises an Exception after failing <num_retries> times.
    """
    attempt = 0
    actual_result = None
    url = dbconn.DbURL(hostname=host, port=port, dbname='template1')
    while (attempt < num_retries) and (actual_result != desired_result):
        attempt += 1
        try:
            with closing(dbconn.connect(url, utility=True)) as conn:
                cursor = dbconn.query(conn, query)
                rows = cursor.fetchall()
                actual_result = rows[0][0]
        except Exception as e:
            print('could not query segment (%s:%s) %s' % (host, port, e))
        time.sleep(1)

    if attempt == num_retries:
        raise Exception('Timed out after %s retries' % num_retries)
예제 #27
0
 def validate_columns(self, schema_name, table_name, sort_column_list):
     columns = []
     conn = dbconn.connect(
         dbconn.DbURL(dbname=self.database, port=self.port))
     try:
         res = dbconn.query(
             conn, """SELECT attname
                          FROM pg_attribute
                          WHERE attrelid = (SELECT pg_class.oid
                                            FROM pg_class, pg_namespace
                                            WHERE pg_class.relname = '{table}' AND pg_namespace.nspname = '{schema}')"""
             .format(table=table_name, schema=schema_name))
         for cols in res.fetchall():
             columns.append(cols[0].strip())
         for c in sort_column_list:
             if c[0] not in columns:
                 raise ExceptionNoStackTraceNeeded(
                     'Table {schema}.{table} does not have column {col}'.
                     format(schema=schema_name, table=table_name, col=c[0]))
     finally:
         conn.close()
예제 #28
0
파일: reload.py 프로젝트: zyclove/gpdb
    def get_parent_partitions(self):
        with closing(
                dbconn.connect(
                    dbconn.DbURL(dbname=self.database,
                                 port=self.port))) as conn:
            for schema, table, col_list in self.table_list:
                PARENT_PARTITION_TABLENAME = """SELECT nspname, relname
                                                FROM pg_catalog.pg_class AS c
                                                JOIN pg_catalog.pg_namespace AS ns
                                                ON c.relnamespace = ns.oid
                                                WHERE c.oid = pg_partition_root('%s.%s')
                                                """ % (schema, table)
                res = dbconn.query(conn, PARENT_PARTITION_TABLENAME)
                for r in res:
                    self.parent_partition_map[(schema, table)] = (r[0], r[1])

                if (schema, table) not in self.parent_partition_map:
                    self.parent_partition_map[(schema, table)] = (schema,
                                                                  table)

        return self.parent_partition_map
예제 #29
0
파일: utils.py 프로젝트: petersky/gpdb
def wait_for_desired_query_result(dburl, query, desired_result, utility=False):
    """
    Tries once a second to check for the desired query result on the segment.
    Raises an Exception after failing <num_retries> times.
    """
    attempt = 0
    num_retries = 150
    actual_result = None
    while (attempt < num_retries) and (actual_result != desired_result):
        attempt += 1
        try:
            with closing(dbconn.connect(dburl, utility=utility)) as conn:
                cursor = dbconn.query(conn, query)
                rows = cursor.fetchall()
                actual_result = rows[0][0]
        except Exception as e:
            print('could not query (%s:%s) %s' %
                  (dburl.pghost, dburl.pgport, e))
        time.sleep(1)

    if attempt == num_retries:
        raise Exception('Timed out after %s retries' % num_retries)
예제 #30
0
def get_tablespace_locations(all_hosts, mirror_data_directory):
    """
    to get user defined tablespace locations for all hosts or a specific mirror data directory.
    :param all_hosts: boolean type to indicate if tablespace locations should be fetched from all hosts.
                      Only gpdeletesystem will call it with True
    :param mirror_data_directory: string type to fetch tablespace locations for a specific mirror data directory.
                      Only gpmovemirrors will call it with specific data directory
    :return: list of tablespace locations
    """
    tablespace_locations = []
    oid_subq = """ (SELECT *
                    FROM (
                        SELECT oid FROM pg_tablespace
                        WHERE spcname NOT IN ('pg_default', 'pg_global')
                        ) AS _q1,
                        LATERAL gp_tablespace_location(_q1.oid)
                    ) AS t """

    with dbconn.connect(dbconn.DbURL()) as conn:
        if all_hosts:
            tablespace_location_sql = """
                SELECT c.hostname, t.tblspc_loc||'/'||c.dbid tblspc_loc
                FROM {oid_subq}
                    JOIN gp_segment_configuration AS c
                    ON t.gp_segment_id = c.content
                """.format(oid_subq=oid_subq)
        else:
            tablespace_location_sql = """
                SELECT c.hostname,c.content, t.tblspc_loc||'/'||c.dbid tblspc_loc
                FROM {oid_subq}
                    JOIN gp_segment_configuration AS c
                    ON t.gp_segment_id = c.content
                    AND c.role='m' AND c.datadir ='{mirror_data_directory}'
                """.format(oid_subq=oid_subq,
                           mirror_data_directory=mirror_data_directory)
        res = dbconn.query(conn, tablespace_location_sql)
        for r in res:
            tablespace_locations.append(r)
    return tablespace_locations