コード例 #1
0
 def setup(self):
     # check if the database exist
     try: 
         cnx = pg.connect(dbname = self.database_name)
     except Exception, e:
         cnx = pg.connect(dbname = 'postgres')
         cnx.query('CREATE DATABASE %s;' % (self.database_name))
コード例 #2
0
def main(argv):
    # Parse the command line parameters
    args = Argument_Parsing(argv)

    if args.a:
        quiet = True
    else:
        quiet = False

    # Parse the configuration file
    config_file = args.f
    Check_Settings(config_file, quiet)

    # connect to the Result and Replay database
    db_Result = pg.connect(dbname=ResultDatabaseName,
                           host=ResultDatabaseIPAddress,
                           port=ResultDatabasePort,
                           user=ResultDatabaseUsername)
    db_Result_Connect_String = 'host=' + ResultDatabaseIPAddress + ' port=' + str(
        ResultDatabasePort
    ) + ' user='******' dbname=' + ResultDatabaseName
    db_Replay = pg.connect(dbname=ReplayDatabase,
                           host=ReplayDatabaseIPAddress,
                           port=ReplayDatabasePort,
                           user=ReplayDatabaseUsername)

    # Output: clean the output result table of the Query replicator tool
    if args.c:
        Create_Reset_Result_Table(db_Result, quiet)

    # Log: Create External table on top of log file
    if args.L:
        Create_ExtTable_On_Log(db_Result_Connect_String)

    # Check the log file read by the external table and the log file in the config file
    Ext_Table_File = get_LogFileName_Ext_Table(db_Result)
    if Ext_Table_File <> logFileName:
        print 'ERROR: difference between file read in external table and file in ' + config_file
        print '    - file read in external table:' + Ext_Table_File
        print '    - file in properties.py:      ' + logFileName
        sys.exit(1)

    # Excluded: Create or reset excluded table
    if args.e:
        Init_Excluded_Queries_Table(db_Result_Connect_String, quiet)

    # If the option -r is not used, the queries are not replayed
    if not args.r:
        sys.exit(0)

    Replay_Queries(db_Result)

    Extract_Statistics(file_name_summary, db_Result)

    Trace_End(file_name_summary)

    db_Result.close()
コード例 #3
0
def resetOutputTable(hostname, username, dbname, port):
    db = pg.connect(dbname=dbname, host=hostname, port=port, user=username)
    try:
        query_results = db.query('DROP table if exists ' + outputTableName)
        query_results = db.query(
            'CREATE TABLE ' + outputTableName +
            ' (sequence serial primary key, userQuery VARCHAR(100000), database VARCHAR(100000), statement VARCHAR(100000), success VARCHAR(100000), error VARCHAR(100000), timestamp_start timestamp, timestamp_end timestamp)'
        )
    except Exception as e:
        print('impossible to create output table: System error' +
              str(exception))
コード例 #4
0
def make_conn(ug, user, db, options, port, sockdir):
    retries = 5
    for i in range(retries):
        try:
            logger.debug("making database connection: user = %s, "
                         "dbname = %s port = %i" % (user, db, port))
            conn = pg.connect(user=user, dbname=db, opt=options, port=port)
            break
        except pg.InternalError, e:
            if 'too many clients already' in str(e) and i < retries:
                logger.warning('Max Connection reached, attempt %d / %d' %
                               (i + 1, retries))
                sleep(2)
                continue
            raise ConnectionError(str(e))
コード例 #5
0
def make_conn(ug, user, db, options, port, sockdir):
    retries = 5
    for i in range(retries):
        try:
            logger.debug("making database connection: user = %s, "
                         "dbname = %s port = %i"
                         % (user, db, port))
            conn = pg.connect(user=user,
                              dbname=db,
                              opt=options,
                              port=port)
            break
        except pg.InternalError, e:
            if 'too many clients already' in str(e) and i < retries:
                logger.warning('Max Connection reached, attempt %d / %d' % (i+1, retries))
                sleep(2)
                continue
            raise ConnectionError(str(e))
コード例 #6
0
def connect(user=None,
            password=None,
            host=None,
            port=None,
            database=None,
            utilityMode=False):
    '''Connect to DB using parameters in GV'''
    options = utilityMode and '-c gp_session_role=utility' or None

    try:
        logging.debug('connecting to %s:%s %s' % (host, port, database))
        db = pg.connect(host=host,
                        port=port,
                        user=user,
                        passwd=password,
                        dbname=database,
                        opt=options)
    except pg.InternalError, ex:
        print ex
        exit(1)
コード例 #7
0
parser = argparse.ArgumentParser()
parser.add_argument("-rs", help="Specifies what to remove in a string in the case that files have the same prefix")
parser.add_argument("-ft", help="Specifies the file type of the files being loaded. 'TEXT' will automatically set the column delimited to tab and 'CSV' will be comma delimited")
args = parser.parse_args()
if args.rs:
    replace_string = args.rs
else:
    replace_string = ''
if args.ft:
    filetype = args.ft
else:
    filetype = 'CSV'

# Connection Configuration #
con = pg.connect('gpadmin', 'localhost', 5432, None, None, 'gpadmin', None)
def init():
    schema = 'public' # source2.split("/")[-2]
    filename = 'my_data'
    first_line = "ID,Name,Age"
    # first_line = 'col1, col2, col3'
    values = first_line.split(",")
    hdfs_host = ""

    list = " text, \n".join([str(i) for i in values])[:-1] + " text"
    hdfs_source = '/tmp/my_data.csv'
    drop_ext = "DROP EXTERNAL TABLE IF EXISTS " + schema + "." + filename + "_ext;"
    print drop_ext
    drop_reg = "DROP TABLE IF EXISTS " + schema + "." + filename + ";"
    print drop_reg
    ext_table = "CREATE EXTERNAL TABLE " + schema + "." + filename + "_ext (\n" + list + " ) \nLOCATION ( 'pxf://10.68.47.141:8020/" + hdfs_source + "?profile=HdfsTextSimple')\nFORMAT '" + filetype + "' \n(HEADER)\nLOG ERRORS INTO errors SEGMENT REJECT LIMIT 2 ;"
コード例 #8
0
 def __init__(self, hostname, port):
     self.conn = pg.connect(host=hostname, port=port, dbname='gpadmin')
コード例 #9
0
def Replay_Queue_Thread(queue, queue_num):
    ## Read from the queue
    # connect to the log database
    counter = 0
    db_Result = pg.connect(dbname=ResultDatabaseName,
                           host=ResultDatabaseIPAddress,
                           port=ResultDatabasePort,
                           user=ResultDatabaseUsername)
    db_Replay = pg.connect(dbname=ReplayDatabase,
                           host=ReplayDatabaseIPAddress,
                           port=ReplayDatabasePort,
                           user=ReplayDatabaseUsername)
    set_Replay_Database_GUC(db_Replay)

    #open a temporary file to write the results for then use the copy command and copy in batch
    file_name = current_path + '/copy/copy' + str(os.getpid()) + '.txt'
    # open with in locking mode
    copyFile = open(file_name, 'w+')

    while True:
        entry = queue.get()  # Read from the queue and do nothing
        # we received the terminated signal there are no more stuff to consume
        if (entry == None):
            break

        # taking current database parameters (username, database and statement)
        user_name = entry[0]
        statement = entry[2]
        event_detail = entry[3]
        query_length = entry[7]
        thread_nb_queries = entry[8]

        statement = checkParticularConditions(statement, event_detail)
        statement = checkAndRemovingComment(statement)
        if statement.strip():
            type_qry = statement.split()[0].lower()
        list_qry_explain = ['select', 'update', 'insert']
        if ReplayRunExplain == True and type_qry in list_qry_explain:
            statement = "EXPLAIN " + statement
        timestamp_start = datetime.datetime.now().strftime("%Y%m%d %H:%M:%S")
        Success, output = Check_Query(entry, statement, db_Replay)
        timestamp_end = datetime.datetime.now().strftime("%Y%m%d %H:%M:%S")
        counter = counter + 1
        step = max(1, thread_nb_queries / 40)
        if counter % step == 0:
            update_progress(counter / (thread_nb_queries * 1.0), queue_num)
        #copy to file
        Write_copyFile(copyFile, user_name, ReplayDatabase, statement,
                       query_length, Success, output, timestamp_start,
                       timestamp_end)

    update_progress(counter / (thread_nb_queries * 1.0), queue_num)
    copyFile.close()
    Load_Copy_Files(db_Result, file_name)

    try:
        os.remove(file_name)
    except OSError:
        pass

    # we are terminating close all
    db_Replay.close()
コード例 #10
0
def init():
    global cfg
    global con
    global start_time
    global location
    global args
    global log

    start_time = datetime.datetime.now().strftime("%y%m%d%H%M%S%f")
    log = open("./sailfish_" + str(start_time) + ".log", "w")

    #########################################################################
    #            Defines arguments to be parsed in command line             #
    #########################################################################
    parser = argparse.ArgumentParser(
        description=
        'Greenplum S3 File viewer\nAuthor: Louis Mugnano\nEmail: [email protected]\nThis script is provided as a field written utility and NOT supported by Pivotal. You are free to alter the script in any way you see fit but if you find bugs in the script or have recommendations to improve it please send an email to the author listed below. By contacting the author, your input for fixes and changes can be made to the script and shared with others.',
        formatter_class=RawTextHelpFormatter)
    parser.add_argument(
        "-cfg",
        required=True,
        help=
        "Specifies configuration file. If an error saying 'no module name .py found', be sure to remove the py extension when specifying the file."
    )
    parser.add_argument(
        "-host",
        help=
        "The host of the master node of Greenplum database. If no -host if specified the script will use the PGHOST environment variable if it is set"
    )
    parser.add_argument(
        "-p",
        type=int,
        help=
        "-port : The port for the master node of Greenplum database. If no -port is specified the script will use the PGPORT environment variable if it is set"
    )
    parser.add_argument(
        "-db",
        help=
        "The database being used. If no db is specified the script will use the PGDATABASE environment variable if it is set"
    )
    parser.add_argument("-usr", help="User to log into the database under")
    parser.add_argument(
        "-tbl",
        help=
        "Set to the name of the target table you want to load data into in the format of <table_name>"
    )
    parser.add_argument("-action", help="Sets the action to be completed")
    parser.add_argument(
        "-s3_bucket", help="Sets the source S3 bucket of what's being loaded.")
    parser.add_argument(
        "-s3_key",
        help="Sets the source S3 object key you want to view and/or load.")
    parser.add_argument(
        "-s3_config",
        help="Set to the full path of the S3 Connector config file.")
    parser.add_argument(
        "-ext_tbl_prefix",
        help=
        "Prefix to use for the generated external table (Default 'ext_' if both ext_tbl_prefix and ext_tbl_suffix are not specified"
    )
    parser.add_argument("-ext_tbl_suffix",
                        help="Suffix to use for the generated external table.")
    parser.add_argument(
        "-view_prefix",
        help=
        "Prefix to use for the generated view (Default 'vw_' if both ext_view_prefix and ext_view_suffix are not specified"
    )
    parser.add_argument("-view_suffix",
                        help="Suffix to use for the generated view.")
    parser.add_argument(
        "-admin_role",
        help="Role to grant admin rights to for generated objects.")
    parser.add_argument(
        "-viewer_role",
        help="Role to grant select rights to for generated objects.")

    args = parser.parse_args()
    path = os.path.splitext(args.cfg)[0]

    if path:
        cfg = __import__(path)
    else:
        raise argparse.ArgumentTypeError(
            "You must specify a configuration file using the -cfg command following the load script. Make sure that you don't include the .py extention when specifying it and ensure that the config file is in the same directory as the gpLoad script. For more information, enter 'python gpLoad.py -h into the command line."
        )

    if args.host:
        cfg.host = args.host
    if args.p:
        cfg.port = args.p
    if args.db:
        cfg.db = args.db
    if args.usr:
        cfg.user = args.usr
    if args.tbl:
        cfg.tbl = args.tbl
    if args.action:
        cfg.action = args.action
    if args.s3_bucket:
        cfg.s3_bucket = args.s3_bucket
    if args.s3_key:
        cfg.s3_key = args.s3_key
    if args.s3_config:
        cfg.s3_config = args.s3_config
    if args.ext_tbl_prefix:
        cfg.ext_tbl_prefix = args.ext_tbl_prefix
    if args.ext_tbl_suffix:
        cfg.ext_tbl_suffix = args.ext_tbl_suffix
    if args.view_prefix:
        cfg.view_prefix = args.view_prefix
    if args.view_suffix:
        cfg.view_suffix = args.view_suffix
    if args.admin_role:
        cfg.admin_role = args.admin_role
    if args.viewer_role:
        cfg.viewer_role = args.viewer_role

    print("-------------  CONNECTION CONFIGURATION  -----------------",
          file=log)
    print("Host: " + (cfg.host), file=log)
    print("Port: " + str(cfg.port), file=log)
    print("Database: " + (cfg.db), file=log)
    print("----------------------------------------------------------",
          file=log)

    try:
        con = pg.connect(dbname=cfg.db,
                         host=cfg.host,
                         port=cfg.port,
                         user=cfg.user)
    except pg.Error as error_message:
        raise Exception("Connection Error: " + str(error_message))
コード例 #11
0
def connect_local_db(dsn):
    try:
        return pg.connect(dsn)
    except:
        raise
コード例 #12
0
    "-ft",
    help=
    "Specifies the file type of the files being loaded. 'TEXT' will automatically set the column delimited to tab and 'CSV' will be comma delimited"
)
args = parser.parse_args()
if args.rs:
    replace_string = args.rs
else:
    replace_string = ''
if args.ft:
    filetype = args.ft
else:
    filetype = 'CSV'

# Connection Configuration #
con = pg.connect('gpadmin', 'localhost', 5432, None, None, 'gpadmin', None)


def init():
    schema = 'public'  # source2.split("/")[-2]
    filename = 'my_data'
    first_line = "ID,Name,Age"
    # first_line = 'col1, col2, col3'
    values = first_line.split(",")
    hdfs_host = ""

    list = " text, \n".join([str(i) for i in values])[:-1] + " text"
    hdfs_source = '/tmp/my_data.csv'
    drop_ext = "DROP EXTERNAL TABLE IF EXISTS " + schema + "." + filename + "_ext;"
    print drop_ext
    drop_reg = "DROP TABLE IF EXISTS " + schema + "." + filename + ";"
コード例 #13
0
def connectToDataBase(hostname, username, dbname, port):
    db = pg.connect(dbname=dbname, host=hostname, port=port, user=username)
    return db