Example #1
0
user = parser.get('user')
project_name = parser.get('project_name')
project_id = parser.get('project_id')
main_ip = parser.get('main_ip')

streaming_mapper = '/bin/cat'
streaming_reducer = '/usr/bin/wc - l'
input_hdfs_path = 'input'
output_hdfs_path = 'output-test'

job_id = 'f915cb23-9ab2-4476-8323-6b8381fd69d0'
cluster_id = '01b4a929-2ab2-43e6-8b3a-c6e163fe6f72'

exec_date = time.strftime('%Y%m%d%H%M%S')

connector = ConnectionGetter(user, password, project_name, project_id, main_ip)

keystone_util = UtilKeystone(connector.keystone())
token_ref_id = keystone_util.getTokenRef(user, password, project_name).id
sahara_util = UtilSahara(connector.sahara(token_ref_id))

input_ds_name = 'input_' + exec_date
input_ds_url = "/user/hadoop/" + input_hdfs_path
input_ds_id = sahara_util.createDataSource(input_ds_name, input_ds_url, 'hdfs', user, password)

output_ds_name = 'output_' + exec_date
output_ds_url = "/user/hadoop/" + output_hdfs_path
output_ds_id = sahara_util.createDataSource(output_ds_name, output_ds_url, 'hdfs', user, password)

sahara_util.runStreamingJob(job_id, cluster_id, streaming_mapper = streaming_mapper, streaming_reducer = streaming_reducer, input_ds_id = input_ds_id, output_ds_id = output_ds_id)
Example #2
0
if job_name == 'Job1':
    job_id = 'job1_id'
    map_output_key = 'map_output_key'
    map_output_value = 'map_output_value' #dont need the hadoop.org....
elif job_name == 'Job2':
    job_id = 'job2_id'
    map_output_key = 'map_output_key'
    map_output_value = 'map_output_value'
else:
    print 'Job doenst exists in the script'
    exit(1)

connector = ConnectionGetter(user, password, project_name, project_id, main_ip)

keystone_util = UtilKeystone(connector.keystone())
token_ref_id = passwordstone_util.getTokenRef(user, password, project_name).id
sahara_util = UtilSahara(connector.sahara(token_ref_id))

exec_date = time.strftime('%Y%m%d%H%M%S')

input_ds_name = 'input_' + file_name + '_' + exec_date
input_ds_utl =  'swift://' + input_container + '.sahara/' + file_name

output_ds_name = 'output_' + file_name + '_' + exec_date
output_ds_url =  'swift://' + output_container + '.sahara/' + output_ds_name

input_ds = sahara_util.createDataSource(input_ds_name, input_ds_url, 'swift', user, password)
output_ds = sahara_util.createDataSource(output_ds_name, output_ds_url, 'swift', user, password)

sahara_util.runMapReduceJob(job_name, job_id, cluster_id, map_output_key, map_output_value, input_ds.id, output_ds.id)
streaming_mapper = "/bin/cat"
streaming_reducer = "/usr/bin/wc - l"
input_hdfs_path = "input"
output_hdfs_path = "output-test"

job_id = "f915cb23-9ab2-4476-8323-6b8381fd69d0"
cluster_id = "01b4a929-2ab2-43e6-8b3a-c6e163fe6f72"

exec_date = time.strftime("%Y%m%d%H%M%S")

connector = ConnectionGetter(user, password, project_name, project_id, main_ip)

keystone_util = UtilKeystone(connector.keystone())
token_ref_id = keystone_util.getTokenRef(user, password, project_name).id
sahara_util = UtilSahara(connector.sahara(token_ref_id))

input_ds_name = "input_" + exec_date
input_ds_url = "/user/hadoop/" + input_hdfs_path
input_ds_id = sahara_util.createDataSource(input_ds_name, input_ds_url, "hdfs", user, password)

output_ds_name = "output_" + exec_date
output_ds_url = "/user/hadoop/" + output_hdfs_path
output_ds_id = sahara_util.createDataSource(output_ds_name, output_ds_url, "hdfs", user, password)

sahara_util.runStreamingJob(
    job_id,
    cluster_id,
    streaming_mapper=streaming_mapper,
    streaming_reducer=streaming_reducer,
    input_ds_id=input_ds_id,
Example #4
0
    file_name = raw_input('file name: ')
	cluster_id = raw_input('cluster id: ')
elif len(sys.argv) == MIN_NUM_ARGS:
    job_name = sys.argv[1]
    file_name = sys.argv[2]
	cluster_id = sys.argv[3]
else:
    askForHelp()
    exit(1)

password = getpass.getpass('password: '******'configuration.json')
user = parser.get('user')
project_name = parser.get('project_name')
project_id = parser.get('project_id')
main_ip = parser.get('main_ip')
net_id = parser.get('net_id')

connector = ConnectionGetter(user, key, project_name, project_id, main_ip)

keystone_util = UtilKeystone(connector.keystone())
token_ref_id = keystone_util.getTokenRef(user, key, project_name).id
sahara_util = UtilSahara(connector.sahara(token_ref_id))

cluster_name = 'hadoop-create'
sahara_util.createClusterHadoop(cluster_name, image_id, template_id, net_id)