def machine_allocator(): global slots global free_slots global machines global work_list global run_list while 1: # start all machines if we don't have any but have work queued if len(work_list) and not len(machines): rd_print(None, "Starting machines.") machines = [] while not machines: machines = awsremote.get_machines(args.max_machines, args.awsgroup) for machine in machines: slots.extend(machine.get_slots()) free_slots.extend(slots) time.sleep(60*10) # don't shut down for a tleast 10 minutes # stop all machines if nothing is running slots_busy = False for slot in slots: if slot.busy: slots_busy = True if not slots_busy and not len(work_list) and not len(run_list): rd_print(None, "Stopping all machines.") machines = [] slots = [] free_slots = [] awsremote.stop_machines(args.awsgroup) time.sleep(60)
def machine_allocator_tick(): global slots global free_slots global machines global work_list global run_list # start all machines if we don't have any but have work queued if len(work_list) and not len(machines): rd_print(None, "Starting machines.") #awsgroup.start_machines(args.max_machines, args.awsgroup) # stop all machines if nothing is running slots_busy = False for slot in slots: if slot.busy: slots_busy = True if not slots_busy and not len(work_list) and not len(run_list): rd_print(None, "Stopping all machines.") machines = [] slots = [] free_slots = [] #awsremote.stop_machines(args.awsgroup) try: updated_machines = awsremote.get_machines(args.max_machines, args.awsgroup) except: tornado.ioloop.IOLoop.current().call_later(60, machine_allocator_tick) return print(updated_machines) for m in machines: matching = [um for um in updated_machines if um.host == m.host] if len(matching) == 0: rd_print(None, "Machine disappeared: " + m.get_name()) for s in m.slots: slots.remove(s) try: free_slots.remove(s) except: pass machines.remove(m) for um in updated_machines: print(um, um.get_name()) matching = [m for m in machines if m.host == um.host] if len(matching) == 0: rd_print(None, "Machine appeared: " + um.get_name()) new_slots = um.get_slots() slots.extend(new_slots) free_slots.extend(new_slots) machines.append(um) tornado.ioloop.IOLoop.current().call_later(60, machine_allocator_tick)
def machine_allocator_tick(): global slots global free_slots global machines global work_list global run_list # start all machines if we don't have any but have work queued if len(work_list) and not len(machines): rd_print(None, "Starting machines.") #awsgroup.start_machines(args.max_machines, args.awsgroup) # stop all machines if nothing is running slots_busy = False for slot in slots: if slot.busy: slots_busy = True if not slots_busy and not len(work_list) and not len(run_list): rd_print(None, "Stopping all machines.") machines = [] slots = [] free_slots = [] #awsremote.stop_machines(args.awsgroup) try: updated_machines = awsremote.get_machines(args.max_machines, args.awsgroup) except: tornado.ioloop.IOLoop.current().call_later(60,machine_allocator_tick) return print(updated_machines) for m in machines: matching = [um for um in updated_machines if um.host == m.host] if len(matching) == 0: rd_print(None, "Machine disappeared: " + m.get_name()) for s in m.slots: slots.remove(s) try: free_slots.remove(s) except: pass machines.remove(m) for um in updated_machines: print(um, um.get_name()) matching = [m for m in machines if m.host == um.host] if len(matching) == 0: rd_print(None, "Machine appeared: " + um.get_name()) new_slots = um.get_slots() slots.extend(new_slots) free_slots.extend(new_slots) machines.append(um) tornado.ioloop.IOLoop.current().call_later(60,machine_allocator_tick)
print(GetTime(),'0 out of',total_num_of_jobs,'finished.') #how many AWS instances do we want to spin up? #The assumption is each machine can deal with 18 threads, #so up to 18 jobs, use 1 machine, then up to 64 use 2, etc... num_instances_to_use = (31 + total_num_of_jobs) / 18 #...but lock AWS to a max number of instances max_num_instances_to_use = 15 if num_instances_to_use > max_num_instances_to_use: print(GetTime(),'Ideally, we should use',num_instances_to_use, 'AWS instances, but the max is',max_num_instances_to_use,'.') num_instances_to_use = max_num_instances_to_use machines = awsremote.get_machines(num_instances_to_use, aws_group_name) #set up our instances and their free job slots for machine in machines: machine.setup() #by doing the machines in the inner loop, #we end up with heavy jobs split across machines better for i in range(0,32): for machine in machines: free_slots.append(Slot(machine)) #Make a list of the bits of work we need to do. #We pack the stack ordered by filesize ASC, quality ASC (aka. -v DESC) #so we pop the hardest encodes first, #for more efficient use of the AWS machines' time.
#...but lock AWS to a max number of instances max_num_instances_to_use = int(args.machines) if num_instances_to_use > max_num_instances_to_use: rd_print('Ideally, we should use', num_instances_to_use, 'instances, but the max is', max_num_instances_to_use, '.') num_instances_to_use = max_num_instances_to_use machines = [] if args.machineconf: machineconf = json.load(open(args.machineconf, 'r')) for m in machineconf: machines.append(sshslot.Machine(m['host'], m['user'], m['cores'])) else: while not machines: machines = awsremote.get_machines(num_instances_to_use, aws_group_name) slots = [] #set up our instances and their free job slots for machine in machines: machine.setup(args.codec) slots.extend(machine.get_slots()) #Make a list of the bits of work we need to do. #We pack the stack ordered by filesize ASC, quality ASC (aka. -v DESC) #so we pop the hardest encodes first, #for more efficient use of the AWS machines' time. if args.individual: video_filenames = args.set else: