def main(): hr = HadoopRuntime() settings = hr.settings print(settings) hr.clean_working_dir() output_dir = hr.get_hdfs_working_dir("dump_dir") sqoop = MySqoop(settings.Param.Sqoop2Server_Host, int(settings.Param.Sqoop2Server_Port)) # First, Create an connection conn_name = "import_m_job%s_blk%s" % (settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) conn_ret = sqoop.create_connection( conn_name=conn_name, conn_str=settings.Param.connection_string, username=settings.Param.connection_username, password=settings.Param.connection_password) # Then, Run sqoop import job fw_ps = { "output.storageType": "HDFS", "output.outputFormat": "TEXT_FILE", "output.outputDirectory": output_dir } if settings.Param.where_clause and settings.Param.where_clause != None and str( settings.Param.where_clause).strip(" ") != "": table_sql = "select %s from %s where ${CONDITIONS} and %s " % ( settings.Param.input_columns, settings.Param.table_name, settings.Param.where_clause) else: table_sql = "select %s from %s where ${CONDITIONS}" % ( settings.Param.input_columns, settings.Param.table_name) partition_column = settings.Param.partition_column print settings.Param.where_clause print table_sql job_ps = { "table.sql": table_sql, "table.partitionColumn": partition_column } job_name = "import job :: username(%s) job %s, block %s" % ( settings.GlobalParam["userName"], settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) r = sqoop.create_import_job(job_name=job_name, connection_id=conn_ret["id"], framework_params=fw_ps, job_params=job_ps) pp(r) sqoop.run_job(r['id']) sqoop.wait_job(r['id']) sqoop.delete_job(r['id']) # Finally, Delete connection we created sqoop.delete_connection_by_id(conn_ret["id"]) settings.Output.output_dir.val = output_dir print("Done")
def main(): settings = get_settings_from_file("spec.json") print(settings) sqoop = MySqoop(settings.Param.Sqoop2Server_Host, int(settings.Param.Sqoop2Server_Port)) # 1. Create an connection conn_name = "exporter_job%s_blk%s" % ( settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) conn_ret = sqoop.create_connection(conn_name=conn_name, conn_str=settings.Param.connection_string, username=settings.Param.connection_username, password=settings.Param.connection_password) # 2. empty the table print "Deleting the Table %s" % settings.Param.table_name conn_str = settings.Param.connection_string cfg = parse_jdbc(conn_str) cfg["username"] = settings.Param.connection_username cfg["password"] = settings.Param.connection_password print cfg if "postgresql" in cfg["name"]: psycopg2_delete_table(cfg,settings.Param.table_name) print "delete table %s in POSTGRES" % settings.Param.table_name if "sqlserver" in cfg["name"]: pymssql_delete_table(cfg, settings.Param.table_name) print "delete table %s in MS SQL" % settings.Param.table_name # 3. Run sqoop export job print "Running Sqoop2 Job to Export" fw_ps = { "input.inputDirectory": settings.Input.hdfs_path.val } job_ps = { "table.tableName": settings.Param.table_name, "table.columns": settings.Param.table_columns } job_name = "export job :: username(%s) job %s, block %s" % ( settings.GlobalParam["userName"], settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) r = sqoop.create_export_job(job_name=job_name, connection_id=conn_ret["id"], framework_params=fw_ps, job_params=job_ps) pp(r) sqoop.run_job(r['id']) sqoop.wait_job(r['id']) sqoop.delete_job(r['id']) # Finally, Delete connection we created sqoop.delete_connection_by_id(conn_ret["id"]) settings.Output.signal.val="ready" print("Done")
def main(): hr = HadoopRuntime() settings = hr.settings print(settings) hr.clean_working_dir() output_dir = hr.get_hdfs_working_dir("dump_dir") sqoop = MySqoop(settings.Param.Sqoop2Server_Host, int(settings.Param.Sqoop2Server_Port)) # First, Create an connection conn_name = "import_m_job%s_blk%s" % ( settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) conn_ret = sqoop.create_connection(conn_name=conn_name, conn_str=settings.Param.connection_string, username=settings.Param.connection_username, password=settings.Param.connection_password) # Then, Run sqoop import job fw_ps = { "output.storageType": "HDFS", "output.outputFormat": "TEXT_FILE", "output.outputDirectory": output_dir } if settings.Param.where_clause and settings.Param.where_clause != None and str(settings.Param.where_clause).strip(" ") != "": table_sql = "select %s from %s where ${CONDITIONS} and %s " %(settings.Param.input_columns,settings.Param.table_name,settings.Param.where_clause) else: table_sql = "select %s from %s where ${CONDITIONS}" %(settings.Param.input_columns,settings.Param.table_name) partition_column = settings.Param.partition_column print settings.Param.where_clause print table_sql job_ps = { "table.sql": table_sql, "table.partitionColumn": partition_column } job_name = "import job :: username(%s) job %s, block %s" % ( settings.GlobalParam["userName"], settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) r = sqoop.create_import_job(job_name=job_name, connection_id=conn_ret["id"], framework_params=fw_ps, job_params=job_ps) pp(r) sqoop.run_job(r['id']) sqoop.wait_job(r['id']) sqoop.delete_job(r['id']) # Finally, Delete connection we created sqoop.delete_connection_by_id(conn_ret["id"]) settings.Output.output_dir.val = output_dir print("Done")
def main(): hr = HadoopRuntime() settings = hr.settings print(settings) hr.clean_working_dir() output_dir = hr.get_hdfs_working_dir("message_dir") sqoop = MySqoop(settings.Param.Sqoop2Server_Host, int(settings.Param.Sqoop2Server_Port)) # First, Create an connection conn_name = "import_m_job%s_blk%s" % (settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) conn_ret = sqoop.create_connection( conn_name=conn_name, conn_str=settings.Param.connection_string, username=settings.Param.connection_username, password=settings.Param.connection_password) # Then, Run sqoop import job fw_ps = { "output.storageType": "HDFS", "output.outputFormat": "TEXT_FILE", "output.outputDirectory": output_dir } job_ps = { "table.sql": "select UserId,Description,RefreshDate from Message where ${CONDITIONS}", "table.partitionColumn": "UserId" } job_name = "import job :: username(%s) job %s, block %s" % ( settings.GlobalParam["userName"], settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) r = sqoop.create_import_job(job_name=job_name, connection_id=conn_ret["id"], framework_params=fw_ps, job_params=job_ps) pp(r) sqoop.run_job(r['id']) sqoop.wait_job(r['id']) sqoop.delete_job(r['id']) # Finally, Delete connection we created sqoop.delete_connection_by_id(conn_ret["id"]) settings.Output.message_dir.val = output_dir print("Done")
def main(): hr = HadoopRuntime() settings = hr.settings print(settings) hr.clean_working_dir() output_dir = hr.get_hdfs_working_dir("message_dir") sqoop = MySqoop(settings.Param.Sqoop2Server_Host, int(settings.Param.Sqoop2Server_Port)) # First, Create an connection conn_name = "import_m_job%s_blk%s" % ( settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) conn_ret = sqoop.create_connection(conn_name=conn_name, conn_str=settings.Param.connection_string, username=settings.Param.connection_username, password=settings.Param.connection_password) # Then, Run sqoop import job fw_ps = { "output.storageType": "HDFS", "output.outputFormat": "TEXT_FILE", "output.outputDirectory": output_dir } job_ps = { "table.sql": "select UserId,Description,RefreshDate from Message where ${CONDITIONS}", "table.partitionColumn": "UserId" } job_name = "import job :: username(%s) job %s, block %s" % ( settings.GlobalParam["userName"], settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) r = sqoop.create_import_job(job_name=job_name, connection_id=conn_ret["id"], framework_params=fw_ps, job_params=job_ps) pp(r) sqoop.run_job(r['id']) sqoop.wait_job(r['id']) sqoop.delete_job(r['id']) # Finally, Delete connection we created sqoop.delete_connection_by_id(conn_ret["id"]) settings.Output.message_dir.val = output_dir print("Done")