def setup(self): # check if the database exist try: cnx = pg.connect(dbname = self.database_name) except Exception, e: cnx = pg.connect(dbname = 'postgres') cnx.query('CREATE DATABASE %s;' % (self.database_name))
def main(argv): # Parse the command line parameters args = Argument_Parsing(argv) if args.a: quiet = True else: quiet = False # Parse the configuration file config_file = args.f Check_Settings(config_file, quiet) # connect to the Result and Replay database db_Result = pg.connect(dbname=ResultDatabaseName, host=ResultDatabaseIPAddress, port=ResultDatabasePort, user=ResultDatabaseUsername) db_Result_Connect_String = 'host=' + ResultDatabaseIPAddress + ' port=' + str( ResultDatabasePort ) + ' user='******' dbname=' + ResultDatabaseName db_Replay = pg.connect(dbname=ReplayDatabase, host=ReplayDatabaseIPAddress, port=ReplayDatabasePort, user=ReplayDatabaseUsername) # Output: clean the output result table of the Query replicator tool if args.c: Create_Reset_Result_Table(db_Result, quiet) # Log: Create External table on top of log file if args.L: Create_ExtTable_On_Log(db_Result_Connect_String) # Check the log file read by the external table and the log file in the config file Ext_Table_File = get_LogFileName_Ext_Table(db_Result) if Ext_Table_File <> logFileName: print 'ERROR: difference between file read in external table and file in ' + config_file print ' - file read in external table:' + Ext_Table_File print ' - file in properties.py: ' + logFileName sys.exit(1) # Excluded: Create or reset excluded table if args.e: Init_Excluded_Queries_Table(db_Result_Connect_String, quiet) # If the option -r is not used, the queries are not replayed if not args.r: sys.exit(0) Replay_Queries(db_Result) Extract_Statistics(file_name_summary, db_Result) Trace_End(file_name_summary) db_Result.close()
def resetOutputTable(hostname, username, dbname, port): db = pg.connect(dbname=dbname, host=hostname, port=port, user=username) try: query_results = db.query('DROP table if exists ' + outputTableName) query_results = db.query( 'CREATE TABLE ' + outputTableName + ' (sequence serial primary key, userQuery VARCHAR(100000), database VARCHAR(100000), statement VARCHAR(100000), success VARCHAR(100000), error VARCHAR(100000), timestamp_start timestamp, timestamp_end timestamp)' ) except Exception as e: print('impossible to create output table: System error' + str(exception))
def make_conn(ug, user, db, options, port, sockdir): retries = 5 for i in range(retries): try: logger.debug("making database connection: user = %s, " "dbname = %s port = %i" % (user, db, port)) conn = pg.connect(user=user, dbname=db, opt=options, port=port) break except pg.InternalError, e: if 'too many clients already' in str(e) and i < retries: logger.warning('Max Connection reached, attempt %d / %d' % (i + 1, retries)) sleep(2) continue raise ConnectionError(str(e))
def make_conn(ug, user, db, options, port, sockdir): retries = 5 for i in range(retries): try: logger.debug("making database connection: user = %s, " "dbname = %s port = %i" % (user, db, port)) conn = pg.connect(user=user, dbname=db, opt=options, port=port) break except pg.InternalError, e: if 'too many clients already' in str(e) and i < retries: logger.warning('Max Connection reached, attempt %d / %d' % (i+1, retries)) sleep(2) continue raise ConnectionError(str(e))
def connect(user=None, password=None, host=None, port=None, database=None, utilityMode=False): '''Connect to DB using parameters in GV''' options = utilityMode and '-c gp_session_role=utility' or None try: logging.debug('connecting to %s:%s %s' % (host, port, database)) db = pg.connect(host=host, port=port, user=user, passwd=password, dbname=database, opt=options) except pg.InternalError, ex: print ex exit(1)
parser = argparse.ArgumentParser() parser.add_argument("-rs", help="Specifies what to remove in a string in the case that files have the same prefix") parser.add_argument("-ft", help="Specifies the file type of the files being loaded. 'TEXT' will automatically set the column delimited to tab and 'CSV' will be comma delimited") args = parser.parse_args() if args.rs: replace_string = args.rs else: replace_string = '' if args.ft: filetype = args.ft else: filetype = 'CSV' # Connection Configuration # con = pg.connect('gpadmin', 'localhost', 5432, None, None, 'gpadmin', None) def init(): schema = 'public' # source2.split("/")[-2] filename = 'my_data' first_line = "ID,Name,Age" # first_line = 'col1, col2, col3' values = first_line.split(",") hdfs_host = "" list = " text, \n".join([str(i) for i in values])[:-1] + " text" hdfs_source = '/tmp/my_data.csv' drop_ext = "DROP EXTERNAL TABLE IF EXISTS " + schema + "." + filename + "_ext;" print drop_ext drop_reg = "DROP TABLE IF EXISTS " + schema + "." + filename + ";" print drop_reg ext_table = "CREATE EXTERNAL TABLE " + schema + "." + filename + "_ext (\n" + list + " ) \nLOCATION ( 'pxf://10.68.47.141:8020/" + hdfs_source + "?profile=HdfsTextSimple')\nFORMAT '" + filetype + "' \n(HEADER)\nLOG ERRORS INTO errors SEGMENT REJECT LIMIT 2 ;"
def __init__(self, hostname, port): self.conn = pg.connect(host=hostname, port=port, dbname='gpadmin')
def Replay_Queue_Thread(queue, queue_num): ## Read from the queue # connect to the log database counter = 0 db_Result = pg.connect(dbname=ResultDatabaseName, host=ResultDatabaseIPAddress, port=ResultDatabasePort, user=ResultDatabaseUsername) db_Replay = pg.connect(dbname=ReplayDatabase, host=ReplayDatabaseIPAddress, port=ReplayDatabasePort, user=ReplayDatabaseUsername) set_Replay_Database_GUC(db_Replay) #open a temporary file to write the results for then use the copy command and copy in batch file_name = current_path + '/copy/copy' + str(os.getpid()) + '.txt' # open with in locking mode copyFile = open(file_name, 'w+') while True: entry = queue.get() # Read from the queue and do nothing # we received the terminated signal there are no more stuff to consume if (entry == None): break # taking current database parameters (username, database and statement) user_name = entry[0] statement = entry[2] event_detail = entry[3] query_length = entry[7] thread_nb_queries = entry[8] statement = checkParticularConditions(statement, event_detail) statement = checkAndRemovingComment(statement) if statement.strip(): type_qry = statement.split()[0].lower() list_qry_explain = ['select', 'update', 'insert'] if ReplayRunExplain == True and type_qry in list_qry_explain: statement = "EXPLAIN " + statement timestamp_start = datetime.datetime.now().strftime("%Y%m%d %H:%M:%S") Success, output = Check_Query(entry, statement, db_Replay) timestamp_end = datetime.datetime.now().strftime("%Y%m%d %H:%M:%S") counter = counter + 1 step = max(1, thread_nb_queries / 40) if counter % step == 0: update_progress(counter / (thread_nb_queries * 1.0), queue_num) #copy to file Write_copyFile(copyFile, user_name, ReplayDatabase, statement, query_length, Success, output, timestamp_start, timestamp_end) update_progress(counter / (thread_nb_queries * 1.0), queue_num) copyFile.close() Load_Copy_Files(db_Result, file_name) try: os.remove(file_name) except OSError: pass # we are terminating close all db_Replay.close()
def init(): global cfg global con global start_time global location global args global log start_time = datetime.datetime.now().strftime("%y%m%d%H%M%S%f") log = open("./sailfish_" + str(start_time) + ".log", "w") ######################################################################### # Defines arguments to be parsed in command line # ######################################################################### parser = argparse.ArgumentParser( description= 'Greenplum S3 File viewer\nAuthor: Louis Mugnano\nEmail: [email protected]\nThis script is provided as a field written utility and NOT supported by Pivotal. You are free to alter the script in any way you see fit but if you find bugs in the script or have recommendations to improve it please send an email to the author listed below. By contacting the author, your input for fixes and changes can be made to the script and shared with others.', formatter_class=RawTextHelpFormatter) parser.add_argument( "-cfg", required=True, help= "Specifies configuration file. If an error saying 'no module name .py found', be sure to remove the py extension when specifying the file." ) parser.add_argument( "-host", help= "The host of the master node of Greenplum database. If no -host if specified the script will use the PGHOST environment variable if it is set" ) parser.add_argument( "-p", type=int, help= "-port : The port for the master node of Greenplum database. If no -port is specified the script will use the PGPORT environment variable if it is set" ) parser.add_argument( "-db", help= "The database being used. If no db is specified the script will use the PGDATABASE environment variable if it is set" ) parser.add_argument("-usr", help="User to log into the database under") parser.add_argument( "-tbl", help= "Set to the name of the target table you want to load data into in the format of <table_name>" ) parser.add_argument("-action", help="Sets the action to be completed") parser.add_argument( "-s3_bucket", help="Sets the source S3 bucket of what's being loaded.") parser.add_argument( "-s3_key", help="Sets the source S3 object key you want to view and/or load.") parser.add_argument( "-s3_config", help="Set to the full path of the S3 Connector config file.") parser.add_argument( "-ext_tbl_prefix", help= "Prefix to use for the generated external table (Default 'ext_' if both ext_tbl_prefix and ext_tbl_suffix are not specified" ) parser.add_argument("-ext_tbl_suffix", help="Suffix to use for the generated external table.") parser.add_argument( "-view_prefix", help= "Prefix to use for the generated view (Default 'vw_' if both ext_view_prefix and ext_view_suffix are not specified" ) parser.add_argument("-view_suffix", help="Suffix to use for the generated view.") parser.add_argument( "-admin_role", help="Role to grant admin rights to for generated objects.") parser.add_argument( "-viewer_role", help="Role to grant select rights to for generated objects.") args = parser.parse_args() path = os.path.splitext(args.cfg)[0] if path: cfg = __import__(path) else: raise argparse.ArgumentTypeError( "You must specify a configuration file using the -cfg command following the load script. Make sure that you don't include the .py extention when specifying it and ensure that the config file is in the same directory as the gpLoad script. For more information, enter 'python gpLoad.py -h into the command line." ) if args.host: cfg.host = args.host if args.p: cfg.port = args.p if args.db: cfg.db = args.db if args.usr: cfg.user = args.usr if args.tbl: cfg.tbl = args.tbl if args.action: cfg.action = args.action if args.s3_bucket: cfg.s3_bucket = args.s3_bucket if args.s3_key: cfg.s3_key = args.s3_key if args.s3_config: cfg.s3_config = args.s3_config if args.ext_tbl_prefix: cfg.ext_tbl_prefix = args.ext_tbl_prefix if args.ext_tbl_suffix: cfg.ext_tbl_suffix = args.ext_tbl_suffix if args.view_prefix: cfg.view_prefix = args.view_prefix if args.view_suffix: cfg.view_suffix = args.view_suffix if args.admin_role: cfg.admin_role = args.admin_role if args.viewer_role: cfg.viewer_role = args.viewer_role print("------------- CONNECTION CONFIGURATION -----------------", file=log) print("Host: " + (cfg.host), file=log) print("Port: " + str(cfg.port), file=log) print("Database: " + (cfg.db), file=log) print("----------------------------------------------------------", file=log) try: con = pg.connect(dbname=cfg.db, host=cfg.host, port=cfg.port, user=cfg.user) except pg.Error as error_message: raise Exception("Connection Error: " + str(error_message))
def connect_local_db(dsn): try: return pg.connect(dsn) except: raise
"-ft", help= "Specifies the file type of the files being loaded. 'TEXT' will automatically set the column delimited to tab and 'CSV' will be comma delimited" ) args = parser.parse_args() if args.rs: replace_string = args.rs else: replace_string = '' if args.ft: filetype = args.ft else: filetype = 'CSV' # Connection Configuration # con = pg.connect('gpadmin', 'localhost', 5432, None, None, 'gpadmin', None) def init(): schema = 'public' # source2.split("/")[-2] filename = 'my_data' first_line = "ID,Name,Age" # first_line = 'col1, col2, col3' values = first_line.split(",") hdfs_host = "" list = " text, \n".join([str(i) for i in values])[:-1] + " text" hdfs_source = '/tmp/my_data.csv' drop_ext = "DROP EXTERNAL TABLE IF EXISTS " + schema + "." + filename + "_ext;" print drop_ext drop_reg = "DROP TABLE IF EXISTS " + schema + "." + filename + ";"
def connectToDataBase(hostname, username, dbname, port): db = pg.connect(dbname=dbname, host=hostname, port=port, user=username) return db