Example #1
0
def create_job_items_from_directory(job_item, dir_path):
	items_log.info("Creating job items from directory")
	fileslist = []
	for (dirpath, dirnames, filenames) in os.walk(dir_path):
		for f in filenames:
			fileslist.append({
				"filename": f,
				"file_path": os.path.join(dirpath,f),				
			})
	for f in fileslist:
		s3.upload_file(f['filename'], f['file_path'], S3_WORKING_INPUT_BUCKET)
		create_job_item(job_item['job_id'], f['filename'], sqs.get_queue(SQS_JOB_ITEMS_QUEUE))
Example #2
0
def cleanup_all(list_of_filenames):
    filelist = [f for f in os.listdir(".") if f.endswith(".swc")]
    loglist = [f for f in os.listdir(".") if f.endswith("log.txt")]
    reconstructlist = [f for f in os.listdir(".") if f.startswith("tmp_binarized_Reconstruction")]
    filelist.extend(loglist)
    filelist.extend(list_of_filenames)
    print "Files " + str(filelist)
    for f in filelist:
        try:
            os.remove(os.path.abspath(f))
        except Exception, e:
            items_log.info("File to remove not found " + str(e))
Example #3
0
def create_job_items_from_directory(job_item, dir_path):
    items_log.info("Creating job items from directory")
    fileslist = []
    for (dirpath, dirnames, filenames) in os.walk(dir_path):
        for f in filenames:
            fileslist.append({
                "filename": f,
                "file_path": os.path.join(dirpath, f),
            })
    for f in fileslist:
        s3.upload_file(f['filename'], f['file_path'], S3_WORKING_INPUT_BUCKET)
        create_job_item(job_item['job_id'], f['filename'],
                        sqs.get_queue(SQS_JOB_ITEMS_QUEUE))
Example #4
0
def run_job(job, max_runtime):
    items_log.info("Tracing neuron... " + job['input_filename'])
    input_file_path = os.path.abspath(job['input_filename'])
    output_file_path = os.path.abspath(job['output_filename'])
    log_file_path = output_file_path + USER_JOB_LOG_EXT
    logfile = open(log_file_path, "w")
    cmd_args = build_cmd_args(job, input_file_path, output_file_path)
    items_log.info("Running Command: " + " ".join(cmd_args))
    start_time = int(time.time())
    cmd = Command(cmd_args, logfile)
    print "Running " + str(" ".join(cmd_args))
    try:
        status = cmd.run(max_runtime)
        runtime = int(time.time()) - start_time
        if status == "OK":
            ok_msg = "\nTrace complete! Runtime = " + str(runtime) + " seconds"
            logfile.write("\n" + ok_msg)
            items_log.info(ok_msg)
        elif status == "TIMEOUT":
            max_runtime_msg = (
                job['input_filename'] + " - " + job['plugin'] +
                " Throwing Exception b/c Max Runtime Exceeded: " +
                str(max_runtime) + " seconds")
            logfile.write("\n" + max_runtime_msg)
            items_log.info(max_runtime_msg)
            raise MaxRuntimeException(max_runtime_msg)
        else:
            job_failed_msg = "Throwing Exception b/c Job Item Failed: " + input_file_path
            logfile.write(job_failed_msg)
            raise Exception(job_failed_msg)
    finally:
        logfile.close()
Example #5
0
def cleanup_all(list_of_filenames):
    filelist = [f for f in os.listdir(".") if f.endswith(".swc")]
    loglist = [f for f in os.listdir(".") if f.endswith("log.txt")]
    reconstructlist = [
        f for f in os.listdir(".")
        if f.startswith("tmp_binarized_Reconstruction")
    ]
    filelist.extend(loglist)
    filelist.extend(list_of_filenames)
    print "Files " + str(filelist)
    for f in filelist:
        try:
            os.remove(os.path.abspath(f))
        except Exception, e:
            items_log.info("File to remove not found " + str(e))
Example #6
0
def get_timeout(file_bytes, bytes_per_sec, max_time, min_time, buffer_multiplier):
    """
    Returns estimate job item runtime w buffer between min and max
    """
    items_log.info("buffer " + str(buffer_multiplier))
    items_log.info("Filesize in MB: " + str(file_bytes/BYTES_PER_MEGABYTE))
    estimated_runtime = file_bytes / bytes_per_sec
    items_log.info("Est Runtime: " + str(int(estimated_runtime)))
    timeout = int(estimated_runtime * buffer_multiplier)
    items_log.info("Est Runtime w buffer: " + str(int(timeout)))
    return max_time
Example #7
0
def process_next_job_item():
	tasks_log.info("Getting next job_item from queue")
	queue = sqs.get_queue(client_constants.SQS_JOB_ITEMS_QUEUE)
	msg = sqs.get_next_message(queue)
	if msg is None: 
		tasks_log.info("No job items found in Queue")
		return
	job_item_key = msg['MessageAttributes']['job_item_key']['StringValue']
	tasks_log.info("Found new job_item " + job_item_key)
	job_item = job_item_manager.get_job_item_doc(job_item_key)
	job_item['attempts'] += 1
	status = job_item_manager.process_job_item(job_item)
	if status == "COMPLETE":
		items_log.info("Deleting completed job_item from queue")
		sqs.delete_message(queue, msg)
	else:
		# We are going to let SQS handle retries
		items_log.info("Leaving job_item in queue")
		
Example #8
0
def get_timeout(file_bytes, bytes_per_sec, max_time, min_time,
                buffer_multiplier):
    """
    Returns estimate job item runtime w buffer between min and max
    """
    items_log.info("buffer " + str(buffer_multiplier))
    items_log.info("Filesize in MB: " + str(file_bytes / BYTES_PER_MEGABYTE))
    estimated_runtime = file_bytes / bytes_per_sec
    items_log.info("Est Runtime: " + str(int(estimated_runtime)))
    timeout = int(estimated_runtime * buffer_multiplier)
    items_log.info("Est Runtime w buffer: " + str(int(timeout)))
    return max_time
Example #9
0
def process_zip_file(job_item, zip_file_path, max_runtime):
	"""
	Unzip compressed file
	Create new job_item record(s)
	Upload new uncompressed file(s) to s3
	"""
	output_dir = os.path.dirname(zip_file_path)
	zip_archive = zipfile.ZipFile(zip_file_path, "r")
	filenames = zip_archive.namelist()
	if len(filenames) > 1:
		items_log.info("found more than 1 file inside .zip")
		output_dir = os.path.join(output_dir, zip_file_path[:zip_file_path.find(zipper.ZIP_FILE_EXT)])
		zipper.expand_zip_archive(zip_archive, output_dir)
		zip_archive.close()
		create_job_items_from_directory(job_item, output_dir)
		shutil.rmtree(output_dir)
		status = "COMPLETE"
	else:
		items_log.info("found only 1 file inside .zip")
		filename = filenames[0]
		file_path = os.path.join(output_dir, filename)
		zipper.extract_file_from_archive(zip_archive, filename, file_path)
		zip_archive.close()
		job_item['input_filename'] = filename
		job_item['output_filename'] = filename + OUTPUT_FILE_SUFFIXES[job_item['plugin']]
		items_log.info("New output filename: " + job_item['output_filename'])
		runtimes = PLUGINS[job_item['plugin']]['runtime']
		max_runtime = timeout.get_timeout_from_file(file_path, runtimes['bytes_per_sec'], 
			runtimes['max'], runtimes['min'])
		status = run_job_item(job_item, max_runtime)
	os.remove(zip_file_path)
	return status
Example #10
0
def run_job(job, max_runtime):
    items_log.info("Tracing neuron... " + job["input_filename"])
    input_file_path = os.path.abspath(job["input_filename"])
    output_file_path = os.path.abspath(job["output_filename"])
    log_file_path = output_file_path + USER_JOB_LOG_EXT
    logfile = open(log_file_path, "w")
    cmd_args = build_cmd_args(job, input_file_path, output_file_path)
    items_log.info("Running Command: " + " ".join(cmd_args))
    start_time = int(time.time())
    cmd = Command(cmd_args, logfile)
    print "Running " + str(" ".join(cmd_args))
    try:
        status = cmd.run(max_runtime)
        runtime = int(time.time()) - start_time
        if status == "OK":
            ok_msg = "\nTrace complete! Runtime = " + str(runtime) + " seconds"
            logfile.write("\n" + ok_msg)
            items_log.info(ok_msg)
        elif status == "TIMEOUT":
            max_runtime_msg = (
                job["input_filename"]
                + " - "
                + job["plugin"]
                + " Throwing Exception b/c Max Runtime Exceeded: "
                + str(max_runtime)
                + " seconds"
            )
            logfile.write("\n" + max_runtime_msg)
            items_log.info(max_runtime_msg)
            raise MaxRuntimeException(max_runtime_msg)
        else:
            job_failed_msg = "Throwing Exception b/c Job Item Failed: " + input_file_path
            logfile.write(job_failed_msg)
            raise Exception(job_failed_msg)
    finally:
        logfile.close()
Example #11
0
def run_job_item(job_item, max_runtime):
	items_log.info("running job item " + str(job_item))
	local_file_path = os.path.abspath(job_item['input_filename'])
	job_item_status = "ERROR"
	try:
		if zipper.is_zip_file(local_file_path):
			job_item_status = process_zip_file(job_item, local_file_path, max_runtime)
		else:
			process_non_zip_file(job_item, max_runtime)
			items_log.info("Job Item Succeeded")
			job_item_status = "COMPLETE"
	except MaxRuntimeException as e:
		job_item_status = 'TIMEOUT'	
		items_log.error("Job Item Timeout " + str(e) + traceback.format_exc())		
	except Exception as e:
		items_log.error("Job Item Error " + traceback.format_exc() + "\n" + str(e))
	finally:
		items_log.info("Job_Item Status: " + job_item_status)
		job_item['status_id'] = get_job_item_status_id(job_item_status)
		#job_item['status_id'] = get_status_id_with_retry(job_item)
		save_job_item(job_item)
		return job_item_status
Example #12
0
def process_zip_file(job_item, zip_file_path, max_runtime):
    """
	Unzip compressed file
	Create new job_item record(s)
	Upload new uncompressed file(s) to s3
	"""
    output_dir = os.path.dirname(zip_file_path)
    zip_archive = zipfile.ZipFile(zip_file_path, "r")
    filenames = zip_archive.namelist()
    if len(filenames) > 1:
        items_log.info("found more than 1 file inside .zip")
        output_dir = os.path.join(
            output_dir,
            zip_file_path[:zip_file_path.find(zipper.ZIP_FILE_EXT)])
        zipper.expand_zip_archive(zip_archive, output_dir)
        zip_archive.close()
        create_job_items_from_directory(job_item, output_dir)
        shutil.rmtree(output_dir)
        status = "COMPLETE"
    else:
        items_log.info("found only 1 file inside .zip")
        filename = filenames[0]
        file_path = os.path.join(output_dir, filename)
        zipper.extract_file_from_archive(zip_archive, filename, file_path)
        zip_archive.close()
        job_item['input_filename'] = filename
        job_item['output_filename'] = filename + OUTPUT_FILE_SUFFIXES[
            job_item['plugin']]
        items_log.info("New output filename: " + job_item['output_filename'])
        runtimes = PLUGINS[job_item['plugin']]['runtime']
        max_runtime = timeout.get_timeout_from_file(file_path,
                                                    runtimes['bytes_per_sec'],
                                                    runtimes['max'],
                                                    runtimes['min'])
        status = run_job_item(job_item, max_runtime)
    os.remove(zip_file_path)
    return status
Example #13
0
def run_job_item(job_item, max_runtime):
    items_log.info("running job item " + str(job_item))
    local_file_path = os.path.abspath(job_item['input_filename'])
    job_item_status = "ERROR"
    try:
        if zipper.is_zip_file(local_file_path):
            job_item_status = process_zip_file(job_item, local_file_path,
                                               max_runtime)
        else:
            process_non_zip_file(job_item, max_runtime)
            items_log.info("Job Item Succeeded")
            job_item_status = "COMPLETE"
    except MaxRuntimeException as e:
        job_item_status = 'TIMEOUT'
        items_log.error("Job Item Timeout " + str(e) + traceback.format_exc())
    except Exception as e:
        items_log.error("Job Item Error " + traceback.format_exc() + "\n" +
                        str(e))
    finally:
        items_log.info("Job_Item Status: " + job_item_status)
        job_item['status_id'] = get_job_item_status_id(job_item_status)
        #job_item['status_id'] = get_status_id_with_retry(job_item)
        save_job_item(job_item)
        return job_item_status
Example #14
0
def upload_file(file_key, file_path, bucket_name):
    items_log.info("Uploading file: %s" % file_key)
    k = Key(get_bucket(get_connection(), bucket_name))
    k.key = file_key
    k.set_contents_from_filename(file_path)
    items_log.info("Upload complete!")
Example #15
0
def download_file(file_key, file_path, bucket_name):
    items_log.info("Downloading file: %s" % file_key)
    k = Key(get_bucket(get_connection(), bucket_name))
    k.key = file_key
    k.get_contents_to_filename(file_path)
    items_log.info("Downloading complete!")