def send_job(source_script=None, in_directory=None, out_directory=None, base_directory='task/', load_from_s3=0, s3_bucket_name=None, s3_fetch_path=None, put_to_s3=0, existing_instance=None, itype=None, ami=boom_config.DEFAULT_AMI, security_group=boom_config.DEFAULT_SECURITY_GROUP, ssh_key=boom_config.DEFAULT_SSH_KEY, ssh_key_path=boom_config.DEFAULT_SSH_KEY_PATH): """ Spins up an instance, deploys the job, then exits """ load_from_s3 = int(load_from_s3) put_to_s3 = int(put_to_s3) if not out_directory.endswith('/'): out_directory += '/' out_log_file = base_directory + out_directory + 'shell_log.txt' _make_workspace() # Prepare the local job files f = open(boom_config.TEMPORARY_FOLDER + 'boom_task.py', 'w') f.write(generate_script(fetch=load_from_s3, bucket_name=s3_bucket_name, fetch_path=s3_fetch_path, put=put_to_s3, out_path=out_directory, run=True, script_name=source_script)) f.close() user = '******' ssh_key_path = _expand_path(ssh_key_path) path_to_base_directory = '~/{}'.format(base_directory) instance = None # When provisioning a spot instance # res = conn.request_spot_instances(price='0.011', instance_type='t1.micro', image_id='ami-0b9ad862') # res[0] gives the spot reservation # but this does not have an update method, so need to do # conn.get_all_spot_instance_requests(res[0].id) # res[0].state = 'active' # or res[0].status.code = 'fulfilled' # then res[0].instance_id try: if not existing_instance: instance = provision_instance(itype=itype, ami=ami, security_group=security_group, ssh_key=ssh_key) print "Waiting for instance to boot" else: instance = _get_existing_instance(existing_instance) print 'Using existing instance {}'.format(existing_instance) while instance.state != 'running': sys.stdout.write(".") time.sleep(5) instance.update() sys.stdout.write('\n') except KeyboardInterrupt: print 'Operation cancelled by user. Attempting to terminate instance' if instance: # This does not always terminate, if we are really early in the launch process instance.terminate() _cleanup_workspace() sys.exit(1) time.sleep(15) print "Instance is running at ip {}".format(instance.ip_address) print "Connecting as user {}".format(user) # Set up the fabric environment to connect to the new machine env.host_string = instance.ip_address env.user = user env.key_filename = ssh_key_path attempt = 1 success = False while not success and attempt <= 3: try: run('uname -a') run('pwd') success = True except NetworkError as e: print "Could not connect: {}".format(e) print "Retrying" attempt += 1 continue if not success: print "Could not connect after 3 tries. Aborting" _cleanup_workspace() sys.exit(1) # Send files to the server if exists(base_directory): run('rm -R {}'.format(base_directory)) run('mkdir {}'.format(base_directory)) run('mkdir {}'.format(base_directory + out_directory)) fabput(local_path=_expand_path('./' + boom_config.TEMPORARY_FOLDER + 'boom_task.py'), remote_path='~/' + base_directory) fabput(local_path=_expand_path('./' + source_script), remote_path='~/' + base_directory) with cd(path_to_base_directory): print 'Transferring scripts to instance' # Kick off the script with tmux print 'Kicking off the task' run("tmux new-session -s boom_job -d") # TODO: Does not always seem to be working, but path looks correct run("tmux pipe-pane -o -t boom_job 'exec cat >> {}'".format(out_log_file)) run("tmux send -t boom_job 'python boom_task.py' Enter") _cleanup_workspace()
def put_path(path=None, bucket_name=None, overwrite=0, aws_access_key_id=None, aws_secret_access_key=None): """ Puts a path to S3 If the path is a file, puts just the file into the bucket If the path is a folder, recursively puts the folder into the bucket """ if bucket_name is None: print 'You must provide a bucket name' sys.exit(0) cb = _progress_cb num_cb = 100 debug = 0 reduced = True grant = None headers = {} overwrite = int(overwrite) conn = connect_s3(aws_access_key_id, aws_secret_access_key) b = conn.get_bucket(bucket_name) path = _expand_path(path) files_to_check_for_upload = [] existing_keys_to_check_against = [] prefix = os.getcwd() + '/' key_prefix = '' # Take inventory of the files to upload # For directories, walk recursively files_in_bucket = [k.name for k in b.list()] if os.path.isdir(path): print 'Getting list of existing keys to check against' for root, dirs, files in os.walk(path): for p in files: if p.startswith("."): continue full_path = os.path.join(root, p) key_name = _get_key_name(full_path, prefix, key_prefix) files_to_check_for_upload.append(full_path) if key_name in files_in_bucket: existing_keys_to_check_against.append(full_path) # for single files, just add the file elif os.path.isfile(path): full_path = os.path.abspath(path) key_name = _get_key_name(full_path, prefix, key_prefix) files_to_check_for_upload.append(full_path) if key_name in files_in_bucket: existing_keys_to_check_against.append(full_path) # we are trying to upload something unknown else: print "I don't know what %s is, so i can't upload it" % path print "{} files to upload:".format(len(files_to_check_for_upload)) pprint(files_to_check_for_upload) print "{} Existing files already in bucket:".format( len(existing_keys_to_check_against)) pprint(existing_keys_to_check_against) for full_path in files_to_check_for_upload: key_name = _get_key_name(full_path, prefix, key_prefix) if full_path in existing_keys_to_check_against: if not overwrite and b.get_key(key_name): print 'Skipping %s as it exists in s3' % full_path continue print 'Copying %s to %s/%s' % (full_path, bucket_name, key_name) # 0-byte files don't work and also don't need multipart upload if os.stat(full_path).st_size != 0 and multipart_capable: _multipart_upload(bucket_name, aws_access_key_id, aws_secret_access_key, full_path, key_name, reduced, debug, cb, num_cb, grant or 'private', headers) else: _singlepart_upload(b, key_name, full_path, cb=cb, num_cb=num_cb, policy=grant, reduced_redundancy=reduced, headers=headers)
def put_path(path=None, bucket_name=None, overwrite=0, aws_access_key_id=None, aws_secret_access_key=None): """ Puts a path to S3 If the path is a file, puts just the file into the bucket If the path is a folder, recursively puts the folder into the bucket """ if bucket_name is None: print 'You must provide a bucket name' sys.exit(0) cb = _progress_cb num_cb = 100 debug = 0 reduced = True grant = None headers = {} overwrite = int(overwrite) conn = connect_s3(aws_access_key_id, aws_secret_access_key) b = conn.get_bucket(bucket_name) path = _expand_path(path) files_to_check_for_upload = [] existing_keys_to_check_against = [] prefix = os.getcwd() + '/' key_prefix = '' # Take inventory of the files to upload # For directories, walk recursively files_in_bucket = [k.name for k in b.list()] if os.path.isdir(path): print 'Getting list of existing keys to check against' for root, dirs, files in os.walk(path): for p in files: if p.startswith("."): continue full_path = os.path.join(root, p) key_name = _get_key_name(full_path, prefix, key_prefix) files_to_check_for_upload.append(full_path) if key_name in files_in_bucket: existing_keys_to_check_against.append(full_path) # for single files, just add the file elif os.path.isfile(path): full_path = os.path.abspath(path) key_name = _get_key_name(full_path, prefix, key_prefix) files_to_check_for_upload.append(full_path) if key_name in files_in_bucket: existing_keys_to_check_against.append(full_path) # we are trying to upload something unknown else: print "I don't know what %s is, so i can't upload it" % path print "{} files to upload:".format(len(files_to_check_for_upload)) pprint(files_to_check_for_upload) print "{} Existing files already in bucket:".format(len(existing_keys_to_check_against)) pprint(existing_keys_to_check_against) for full_path in files_to_check_for_upload: key_name = _get_key_name(full_path, prefix, key_prefix) if full_path in existing_keys_to_check_against: if not overwrite and b.get_key(key_name): print 'Skipping %s as it exists in s3' % full_path continue print 'Copying %s to %s/%s' % (full_path, bucket_name, key_name) # 0-byte files don't work and also don't need multipart upload if os.stat(full_path).st_size != 0 and multipart_capable: _multipart_upload(bucket_name, aws_access_key_id, aws_secret_access_key, full_path, key_name, reduced, debug, cb, num_cb, grant or 'private', headers) else: _singlepart_upload(b, key_name, full_path, cb=cb, num_cb=num_cb, policy=grant, reduced_redundancy=reduced, headers=headers)
def send_job(source_script=None, in_directory=None, out_directory=None, base_directory='task/', load_from_s3=0, s3_bucket_name=None, s3_fetch_path=None, put_to_s3=0, existing_instance=None, itype=None, ami=boom_config.DEFAULT_AMI, security_group=boom_config.DEFAULT_SECURITY_GROUP, ssh_key=boom_config.DEFAULT_SSH_KEY, ssh_key_path=boom_config.DEFAULT_SSH_KEY_PATH): """ Spins up an instance, deploys the job, then exits """ load_from_s3 = int(load_from_s3) put_to_s3 = int(put_to_s3) if not out_directory.endswith('/'): out_directory += '/' out_log_file = base_directory + out_directory + 'shell_log.txt' _make_workspace() # Prepare the local job files f = open(boom_config.TEMPORARY_FOLDER + 'boom_task.py', 'w') f.write( generate_script(fetch=load_from_s3, bucket_name=s3_bucket_name, fetch_path=s3_fetch_path, put=put_to_s3, out_path=out_directory, run=True, script_name=source_script)) f.close() user = '******' ssh_key_path = _expand_path(ssh_key_path) path_to_base_directory = '~/{}'.format(base_directory) instance = None # When provisioning a spot instance # res = conn.request_spot_instances(price='0.011', instance_type='t1.micro', image_id='ami-0b9ad862') # res[0] gives the spot reservation # but this does not have an update method, so need to do # conn.get_all_spot_instance_requests(res[0].id) # res[0].state = 'active' # or res[0].status.code = 'fulfilled' # then res[0].instance_id try: if not existing_instance: instance = provision_instance(itype=itype, ami=ami, security_group=security_group, ssh_key=ssh_key) print "Waiting for instance to boot" else: instance = _get_existing_instance(existing_instance) print 'Using existing instance {}'.format(existing_instance) while instance.state != 'running': sys.stdout.write(".") time.sleep(5) instance.update() sys.stdout.write('\n') except KeyboardInterrupt: print 'Operation cancelled by user. Attempting to terminate instance' if instance: # This does not always terminate, if we are really early in the launch process instance.terminate() _cleanup_workspace() sys.exit(1) time.sleep(15) print "Instance is running at ip {}".format(instance.ip_address) print "Connecting as user {}".format(user) # Set up the fabric environment to connect to the new machine env.host_string = instance.ip_address env.user = user env.key_filename = ssh_key_path attempt = 1 success = False while not success and attempt <= 3: try: run('uname -a') run('pwd') success = True except NetworkError as e: print "Could not connect: {}".format(e) print "Retrying" attempt += 1 continue if not success: print "Could not connect after 3 tries. Aborting" _cleanup_workspace() sys.exit(1) # Send files to the server if exists(base_directory): run('rm -R {}'.format(base_directory)) run('mkdir {}'.format(base_directory)) run('mkdir {}'.format(base_directory + out_directory)) fabput(local_path=_expand_path('./' + boom_config.TEMPORARY_FOLDER + 'boom_task.py'), remote_path='~/' + base_directory) fabput(local_path=_expand_path('./' + source_script), remote_path='~/' + base_directory) with cd(path_to_base_directory): print 'Transferring scripts to instance' # Kick off the script with tmux print 'Kicking off the task' run("tmux new-session -s boom_job -d") # TODO: Does not always seem to be working, but path looks correct run("tmux pipe-pane -o -t boom_job 'exec cat >> {}'".format( out_log_file)) run("tmux send -t boom_job 'python boom_task.py' Enter") _cleanup_workspace()