Example #1
0
def update_jobs_created():
	jobs_created = job_manager.get_jobs_by_status("CREATED")
	for job in jobs_created:
		tasks_log.info("Found new job")
		job.status_id = job_manager.get_job_status_id("IN_PROGRESS")
		db.commit()
		email_manager.send_job_created_email(job)
Example #2
0
def update_jobs_created():
    jobs_created = job_manager.get_jobs_by_status("CREATED")
    for job in jobs_created:
        tasks_log.info("Found new job")
        job.status_id = job_manager.get_job_status_id("IN_PROGRESS")
        db.commit()
        email_manager.send_job_created_email(job)
Example #3
0
def update_fleet():
	count = 0
	while count < FLEET_UPDATE_MAX_RUNS:
		try:
			tasks_log.info("Update Jobs and JobItems Fleets. Attempt " + str(count))
			fleet_manager.update_fleet_capacity()
		except Exception, err:
			tasks_log.error(traceback.format_exc())
		finally:
Example #4
0
def poll_job_items_queue():
	count = 0
	while count < POLL_JOB_ITEMS_MAX_RUNS:
		try:
			tasks_log.info("Polling job_items queue " + str(count))
			process_next_job_item()
		except Exception, err:
			tasks_log.error(traceback.format_exc())
		finally:
Example #5
0
def update_fleet():
    count = 0
    while count < FLEET_UPDATE_MAX_RUNS:
        try:
            tasks_log.info("Update Jobs and JobItems Fleets. Attempt " +
                           str(count))
            fleet_manager.update_fleet_capacity()
        except Exception, err:
            tasks_log.error(traceback.format_exc())
        finally:
Example #6
0
def poll_jobs_queue():
	count = 0
	while count < POLL_JOBS_MAX_RUNS:
		try:
			tasks_log.info("Polling jobs created + in-progress queues " + str(count))
			update_jobs_created()
			update_jobs_in_progress()
		except Exception, err:
			tasks_log.error(traceback.format_exc())
		finally:
Example #7
0
def poll_jobs_queue():
    count = 0
    while count < POLL_JOBS_MAX_RUNS:
        try:
            tasks_log.info("Polling jobs created + in-progress queues " +
                           str(count))
            update_jobs_created()
            update_jobs_in_progress()
        except Exception, err:
            tasks_log.error(traceback.format_exc())
        finally:
Example #8
0
def add_job_item_containers(count):
	"""
	Add to instance with most containers
	"""
	i = 0;
	while i < count:
		instance_id = get_available_instance_w_most_containers(ECS_JOB_ITEMS_CLUSTER, 
			JOB_ITEM_CONTAINERS_PER_INSTANCE)
		if instance_id:
			tasks_log.info("JobItem - Adding Container To Instance " + instance_id)
			ecs.start_task_on_instance(ECS_JOB_ITEMS_CLUSTER, ECS_JOB_ITEMS_TASK, instance_id)
		else:
			tasks_log.info("Attempt To Add Container Failed. No available instances found.")
		time.sleep(2)
		i+=1
Example #9
0
def calculate_optimal_job_item_container_capacity(queue, itms_per_cntr):
	"""
	Checks size of JobItems queue and compares with constant JOB_ITEMS_PER_CONTAINER
	"""
	current_time = datetime.datetime.utcnow()
	last_updated_job_item_time = datetime.datetime.utcnow() #job_manager.get_last_updated_job().last_updated
	seconds_since_last_job_item = int((current_time - last_updated_job_item_time).total_seconds())
	
	job_items_queue = sqs.get_queue(queue)
	queue_size = sqs.get_queue_size(job_items_queue.url)
	optimal_containers = calculate_job_item_containers(queue_size, itms_per_cntr)
	msg = ("SITUATION:\nCurrentTime: %s\nLast_Updated: %s\nSecondsSinceLastUpdate: " + 
		"%s\nJobItemQueueSize: %s\nOptimalContainers: %s")
	msg = msg % (current_time, last_updated_job_item_time, seconds_since_last_job_item, 
		queue_size, optimal_containers)
	tasks_log.info(msg)
	return optimal_containers
Example #10
0
def add_job_item_containers(count):
    """
	Add to instance with most containers
	"""
    i = 0
    while i < count:
        instance_id = get_available_instance_w_most_containers(
            ECS_JOB_ITEMS_CLUSTER, JOB_ITEM_CONTAINERS_PER_INSTANCE)
        if instance_id:
            tasks_log.info("JobItem - Adding Container To Instance " +
                           instance_id)
            ecs.start_task_on_instance(ECS_JOB_ITEMS_CLUSTER,
                                       ECS_JOB_ITEMS_TASK, instance_id)
        else:
            tasks_log.info(
                "Attempt To Add Container Failed. No available instances found."
            )
        time.sleep(2)
        i += 1
Example #11
0
def calculate_optimal_job_item_container_capacity(queue, itms_per_cntr):
    """
	Checks size of JobItems queue and compares with constant JOB_ITEMS_PER_CONTAINER
	"""
    current_time = datetime.datetime.utcnow()
    last_updated_job_item_time = datetime.datetime.utcnow(
    )  #job_manager.get_last_updated_job().last_updated
    seconds_since_last_job_item = int(
        (current_time - last_updated_job_item_time).total_seconds())

    job_items_queue = sqs.get_queue(queue)
    queue_size = sqs.get_queue_size(job_items_queue.url)
    optimal_containers = calculate_job_item_containers(queue_size,
                                                       itms_per_cntr)
    msg = (
        "SITUATION:\nCurrentTime: %s\nLast_Updated: %s\nSecondsSinceLastUpdate: "
        + "%s\nJobItemQueueSize: %s\nOptimalContainers: %s")
    msg = msg % (current_time, last_updated_job_item_time,
                 seconds_since_last_job_item, queue_size, optimal_containers)
    tasks_log.info(msg)
    return optimal_containers
Example #12
0
def update_jobs_fleet_containers(cluster, service, max_cnts, min_cnts, cooldown):
	current_containers = ecs.get_service_capacity(cluster, service)
	optimal_containers = calculate_optimal_job_container_capacity(cooldown)
	tasks_log.info("JobsContainers - Current:%s Optimal:%s" % (str(current_containers), str(optimal_containers)))
	if optimal_containers > current_containers and current_containers < max_cnts:
		tasks_log.info("Jobs - Increasing Container Capacity")
		ecs.set_service_capacity(cluster, service, current_containers+1)
		return current_containers+1
	elif optimal_containers < current_containers and current_containers > min_cnts:
		tasks_log.info("Jobs - Reducing Container Capacity")
		ecs.set_service_capacity(cluster, service, current_containers-1)
		return current_containers-1
	else:
		tasks_log.info("Jobs - Leaving Container Capacity Unchanged")
		return current_containers
Example #13
0
def update_jobs_fleet_instances(optimal_cnts, autoscaling_group, max_inst, min_inst, cnts_per_inst):
	current_instances = autoscaling.get_capacity(autoscaling_group)
	optimal_instances = calculate_optimal_job_instance_capacity(optimal_cnts, cnts_per_inst)
	tasks_log.info("JobsInstances - Current:%s Optimal:%s" % (str(current_instances), str(optimal_instances)))
	if optimal_instances > current_instances and current_instances < max_inst:
		tasks_log.info("Jobs - Increasing Instance Capacity")
		autoscaling.increase_capacity(autoscaling_group)
		return current_instances+1
	elif optimal_instances < current_instances and current_instances > min_inst:
		tasks_log.info("Jobs - Reducing Instance Capacity")
		autoscaling.decrease_capacity(autoscaling_group)
		return current_instances-1
	else:
		tasks_log.info("Jobs - Leaving Instance Capacity Unchanged")
		return current_instances
Example #14
0
def update_jobs_fleet_containers(cluster, service, max_cnts, min_cnts,
                                 cooldown):
    current_containers = ecs.get_service_capacity(cluster, service)
    optimal_containers = calculate_optimal_job_container_capacity(cooldown)
    tasks_log.info("JobsContainers - Current:%s Optimal:%s" %
                   (str(current_containers), str(optimal_containers)))
    if optimal_containers > current_containers and current_containers < max_cnts:
        tasks_log.info("Jobs - Increasing Container Capacity")
        ecs.set_service_capacity(cluster, service, current_containers + 1)
        return current_containers + 1
    elif optimal_containers < current_containers and current_containers > min_cnts:
        tasks_log.info("Jobs - Reducing Container Capacity")
        ecs.set_service_capacity(cluster, service, current_containers - 1)
        return current_containers - 1
    else:
        tasks_log.info("Jobs - Leaving Container Capacity Unchanged")
        return current_containers
Example #15
0
def update_jobs_fleet_instances(optimal_cnts, autoscaling_group, max_inst,
                                min_inst, cnts_per_inst):
    current_instances = autoscaling.get_capacity(autoscaling_group)
    optimal_instances = calculate_optimal_job_instance_capacity(
        optimal_cnts, cnts_per_inst)
    tasks_log.info("JobsInstances - Current:%s Optimal:%s" %
                   (str(current_instances), str(optimal_instances)))
    if optimal_instances > current_instances and current_instances < max_inst:
        tasks_log.info("Jobs - Increasing Instance Capacity")
        autoscaling.increase_capacity(autoscaling_group)
        return current_instances + 1
    elif optimal_instances < current_instances and current_instances > min_inst:
        tasks_log.info("Jobs - Reducing Instance Capacity")
        autoscaling.decrease_capacity(autoscaling_group)
        return current_instances - 1
    else:
        tasks_log.info("Jobs - Leaving Instance Capacity Unchanged")
        return current_instances
Example #16
0
def update_job_items_fleet_containers(cluster, queue, max_cntrs, min_cntrs, itms_per_cntr):
	"""
	Slow scale up. No scale down (tasks will die after they complete)
	"""
	current_containers = ecs.get_total_tasks_in_cluster(cluster)
	optimal_containers = calculate_optimal_job_item_container_capacity(queue, itms_per_cntr)
	tasks_log.info("JobItemsContainers - Current:%s Optimal:%s" % (str(current_containers), 
		str(optimal_containers)))
	if optimal_containers > current_containers and current_containers < max_cntrs:
		tasks_log.info("JobItems - Increasing Container Capacity")
		add_job_item_containers(current_containers+1)
		return optimal_containers
	elif optimal_containers < current_containers and current_containers > min_cntrs:
		tasks_log.info("JobItems - Reducing Container Capacity")
		remove_job_item_containers(current_containers-1)
		return current_containers-1
	else:
		tasks_log.info("JobItems - Leaving Container Capacity Unchanged")
		return optimal_containers
Example #17
0
def update_job_items_fleet_instances(autoscaling_grp, max_insts, min_insts, cntrs_per_inst, cntrs):
	"""
	Fast scale up. Slow scale down.
	"""
	current_instances = autoscaling.get_capacity(autoscaling_grp)
	optimal_instances = calculate_optimal_job_item_instance_capacity(cntrs_per_inst, cntrs)
	tasks_log.info("JobItemsInstances: Current:%s Optimal:%s" % (str(current_instances), 
		str(optimal_instances)))
	if optimal_instances > current_instances and current_instances < max_insts:
		tasks_log.info("JobItems - Increasing Instance Capacity")
		autoscaling.increase_capacity(autoscaling_grp)
		return current_instances+1
	elif optimal_instances < current_instances and current_instances > min_insts:
		tasks_log.info("JobItems - Reducing Instance Capacity")
		autoscaling.decrease_capacity(autoscaling_grp)
		return current_instances-1
	else:
		tasks_log.info("JobItems - Leaving Instance Capacity Unchanged")
		return current_instances
Example #18
0
def update_job_items_fleet_instances(autoscaling_grp, max_insts, min_insts,
                                     cntrs_per_inst, cntrs):
    """
	Fast scale up. Slow scale down.
	"""
    current_instances = autoscaling.get_capacity(autoscaling_grp)
    optimal_instances = calculate_optimal_job_item_instance_capacity(
        cntrs_per_inst, cntrs)
    tasks_log.info("JobItemsInstances: Current:%s Optimal:%s" %
                   (str(current_instances), str(optimal_instances)))
    if optimal_instances > current_instances and current_instances < max_insts:
        tasks_log.info("JobItems - Increasing Instance Capacity")
        autoscaling.increase_capacity(autoscaling_grp)
        return current_instances + 1
    elif optimal_instances < current_instances and current_instances > min_insts:
        tasks_log.info("JobItems - Reducing Instance Capacity")
        autoscaling.decrease_capacity(autoscaling_grp)
        return current_instances - 1
    else:
        tasks_log.info("JobItems - Leaving Instance Capacity Unchanged")
        return current_instances
Example #19
0
def update_job_items_fleet_containers(cluster, queue, max_cntrs, min_cntrs,
                                      itms_per_cntr):
    """
	Slow scale up. No scale down (tasks will die after they complete)
	"""
    current_containers = ecs.get_total_tasks_in_cluster(cluster)
    optimal_containers = calculate_optimal_job_item_container_capacity(
        queue, itms_per_cntr)
    tasks_log.info("JobItemsContainers - Current:%s Optimal:%s" %
                   (str(current_containers), str(optimal_containers)))
    if optimal_containers > current_containers and current_containers < max_cntrs:
        tasks_log.info("JobItems - Increasing Container Capacity")
        add_job_item_containers(current_containers + 1)
        return optimal_containers
    elif optimal_containers < current_containers and current_containers > min_cntrs:
        tasks_log.info("JobItems - Reducing Container Capacity")
        remove_job_item_containers(current_containers - 1)
        return current_containers - 1
    else:
        tasks_log.info("JobItems - Leaving Container Capacity Unchanged")
        return optimal_containers
Example #20
0
def calculate_optimal_job_container_capacity(cooldown):
    seconds_since_last_job = get_seconds_since_last_job_run(cooldown)
    jobs_in_flight = (len(job_manager.get_jobs_by_status("IN_PROGRESS")) +
                      len(job_manager.get_jobs_by_status("CREATED")))
    msg = ("SITUATION:\nSecondsSinceLastJobRun: %s\nJobCount: %s")
    msg = msg % (seconds_since_last_job, jobs_in_flight)
    tasks_log.info(msg)
    if jobs_in_flight == 0:
        tasks_log.info("no jobs in flight")
        if seconds_since_last_job > cooldown:
            return 0
        else:
            return 1
    else:
        tasks_log.info("found jobs in flight")
        return 1
Example #21
0
def calculate_optimal_job_container_capacity(cooldown):
	seconds_since_last_job = get_seconds_since_last_job_run(cooldown)
	jobs_in_flight = (len(job_manager.get_jobs_by_status("IN_PROGRESS")) + 
		len(job_manager.get_jobs_by_status("CREATED")))
	msg = ("SITUATION:\nSecondsSinceLastJobRun: %s\nJobCount: %s")
	msg = msg % (seconds_since_last_job, jobs_in_flight)
	tasks_log.info(msg)
	if jobs_in_flight == 0:
		tasks_log.info("no jobs in flight")
		if seconds_since_last_job > cooldown:
			return 0
		else:
			return 1
	else:
		tasks_log.info("found jobs in flight")
		return 1
Example #22
0
def process_next_job_item():
	tasks_log.info("Getting next job_item from queue")
	queue = sqs.get_queue(client_constants.SQS_JOB_ITEMS_QUEUE)
	msg = sqs.get_next_message(queue)
	if msg is None: 
		tasks_log.info("No job items found in Queue")
		return
	job_item_key = msg['MessageAttributes']['job_item_key']['StringValue']
	tasks_log.info("Found new job_item " + job_item_key)
	job_item = job_item_manager.get_job_item_doc(job_item_key)
	job_item['attempts'] += 1
	status = job_item_manager.process_job_item(job_item)
	if status == "COMPLETE":
		items_log.info("Deleting completed job_item from queue")
		sqs.delete_message(queue, msg)
	else:
		# We are going to let SQS handle retries
		items_log.info("Leaving job_item in queue")
		
Example #23
0
def calculate_optimal_job_item_instance_capacity(cntrs_per_inst, opt_cntrs):
	optimal_instances = int(math.ceil(float(opt_cntrs) / cntrs_per_inst))
	msg = "SITUATION:\nOptimalJobItemContainers: %s\nJobItemContainerPerInstance: %s\nOptimalJobItemInstances: %s"
	msg = msg % (opt_cntrs, cntrs_per_inst, optimal_instances)
	tasks_log.info(msg)
	return optimal_instances
Example #24
0
def calculate_optimal_job_item_instance_capacity(cntrs_per_inst, opt_cntrs):
    optimal_instances = int(math.ceil(float(opt_cntrs) / cntrs_per_inst))
    msg = "SITUATION:\nOptimalJobItemContainers: %s\nJobItemContainerPerInstance: %s\nOptimalJobItemInstances: %s"
    msg = msg % (opt_cntrs, cntrs_per_inst, optimal_instances)
    tasks_log.info(msg)
    return optimal_instances