def main(): hr = HadoopRuntime() settings = hr.settings print(settings) hr.clean_working_dir() output_dir = hr.get_hdfs_working_dir("dump_dir") sqoop = MySqoop(settings.Param.Sqoop2Server_Host, int(settings.Param.Sqoop2Server_Port)) # First, Create an connection conn_name = "import_m_job%s_blk%s" % (settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) conn_ret = sqoop.create_connection( conn_name=conn_name, conn_str=settings.Param.connection_string, username=settings.Param.connection_username, password=settings.Param.connection_password) # Then, Run sqoop import job fw_ps = { "output.storageType": "HDFS", "output.outputFormat": "TEXT_FILE", "output.outputDirectory": output_dir } if settings.Param.where_clause and settings.Param.where_clause != None and str( settings.Param.where_clause).strip(" ") != "": table_sql = "select %s from %s where ${CONDITIONS} and %s " % ( settings.Param.input_columns, settings.Param.table_name, settings.Param.where_clause) else: table_sql = "select %s from %s where ${CONDITIONS}" % ( settings.Param.input_columns, settings.Param.table_name) partition_column = settings.Param.partition_column print settings.Param.where_clause print table_sql job_ps = { "table.sql": table_sql, "table.partitionColumn": partition_column } job_name = "import job :: username(%s) job %s, block %s" % ( settings.GlobalParam["userName"], settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) r = sqoop.create_import_job(job_name=job_name, connection_id=conn_ret["id"], framework_params=fw_ps, job_params=job_ps) pp(r) sqoop.run_job(r['id']) sqoop.wait_job(r['id']) sqoop.delete_job(r['id']) # Finally, Delete connection we created sqoop.delete_connection_by_id(conn_ret["id"]) settings.Output.output_dir.val = output_dir print("Done")
def main(): hr = HadoopRuntime() settings = hr.settings print(settings) hr.clean_working_dir() output_dir = hr.get_hdfs_working_dir("dump_dir") sqoop = MySqoop(settings.Param.Sqoop2Server_Host, int(settings.Param.Sqoop2Server_Port)) # First, Create an connection conn_name = "import_m_job%s_blk%s" % ( settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) conn_ret = sqoop.create_connection(conn_name=conn_name, conn_str=settings.Param.connection_string, username=settings.Param.connection_username, password=settings.Param.connection_password) # Then, Run sqoop import job fw_ps = { "output.storageType": "HDFS", "output.outputFormat": "TEXT_FILE", "output.outputDirectory": output_dir } if settings.Param.where_clause and settings.Param.where_clause != None and str(settings.Param.where_clause).strip(" ") != "": table_sql = "select %s from %s where ${CONDITIONS} and %s " %(settings.Param.input_columns,settings.Param.table_name,settings.Param.where_clause) else: table_sql = "select %s from %s where ${CONDITIONS}" %(settings.Param.input_columns,settings.Param.table_name) partition_column = settings.Param.partition_column print settings.Param.where_clause print table_sql job_ps = { "table.sql": table_sql, "table.partitionColumn": partition_column } job_name = "import job :: username(%s) job %s, block %s" % ( settings.GlobalParam["userName"], settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) r = sqoop.create_import_job(job_name=job_name, connection_id=conn_ret["id"], framework_params=fw_ps, job_params=job_ps) pp(r) sqoop.run_job(r['id']) sqoop.wait_job(r['id']) sqoop.delete_job(r['id']) # Finally, Delete connection we created sqoop.delete_connection_by_id(conn_ret["id"]) settings.Output.output_dir.val = output_dir print("Done")
def main(): hr = HadoopRuntime() settings = hr.settings print(settings) hr.clean_working_dir() output_dir = hr.get_hdfs_working_dir("message_dir") sqoop = MySqoop(settings.Param.Sqoop2Server_Host, int(settings.Param.Sqoop2Server_Port)) # First, Create an connection conn_name = "import_m_job%s_blk%s" % (settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) conn_ret = sqoop.create_connection( conn_name=conn_name, conn_str=settings.Param.connection_string, username=settings.Param.connection_username, password=settings.Param.connection_password) # Then, Run sqoop import job fw_ps = { "output.storageType": "HDFS", "output.outputFormat": "TEXT_FILE", "output.outputDirectory": output_dir } job_ps = { "table.sql": "select UserId,Description,RefreshDate from Message where ${CONDITIONS}", "table.partitionColumn": "UserId" } job_name = "import job :: username(%s) job %s, block %s" % ( settings.GlobalParam["userName"], settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) r = sqoop.create_import_job(job_name=job_name, connection_id=conn_ret["id"], framework_params=fw_ps, job_params=job_ps) pp(r) sqoop.run_job(r['id']) sqoop.wait_job(r['id']) sqoop.delete_job(r['id']) # Finally, Delete connection we created sqoop.delete_connection_by_id(conn_ret["id"]) settings.Output.message_dir.val = output_dir print("Done")
def main(): hr = HadoopRuntime() settings = hr.settings print(settings) hr.clean_working_dir() output_dir = hr.get_hdfs_working_dir("message_dir") sqoop = MySqoop(settings.Param.Sqoop2Server_Host, int(settings.Param.Sqoop2Server_Port)) # First, Create an connection conn_name = "import_m_job%s_blk%s" % ( settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) conn_ret = sqoop.create_connection(conn_name=conn_name, conn_str=settings.Param.connection_string, username=settings.Param.connection_username, password=settings.Param.connection_password) # Then, Run sqoop import job fw_ps = { "output.storageType": "HDFS", "output.outputFormat": "TEXT_FILE", "output.outputDirectory": output_dir } job_ps = { "table.sql": "select UserId,Description,RefreshDate from Message where ${CONDITIONS}", "table.partitionColumn": "UserId" } job_name = "import job :: username(%s) job %s, block %s" % ( settings.GlobalParam["userName"], settings.GlobalParam["jobId"], settings.GlobalParam["blockId"]) r = sqoop.create_import_job(job_name=job_name, connection_id=conn_ret["id"], framework_params=fw_ps, job_params=job_ps) pp(r) sqoop.run_job(r['id']) sqoop.wait_job(r['id']) sqoop.delete_job(r['id']) # Finally, Delete connection we created sqoop.delete_connection_by_id(conn_ret["id"]) settings.Output.message_dir.val = output_dir print("Done")