Esempio n. 1
0
 def __init__(self, vm_limit, azure_config, skip_setup, local):
     self.vm_limit = vm_limit # user input
     self.budget = 0
     self.timestamp = datetime.now()
     self.cost_pred = 0
     self.wf_end = None
     
     self.jobs_terminated = False
     self.last_resched = None
     
     self.workflow = Workflow()
     self.logwatcher = LogWatcher()
     
     self.schedule = Schedule()
     
     manager = Machine()
     manager.status = MachineStatus.manager
     manager.condor_slot = 'manager'
     self.machines = [manager]
     
     boot_entry = ScheduleEntry(Job('boot', None), manager, self.timestamp, self.timestamp)
     boot_entry.real_start = self.timestamp
     boot_entry.real_end = self.timestamp
     boot_entry.status = EntryStatus.completed
     self.schedule.add_entry_host(boot_entry, manager)
     
     self.local = local
     if azure_config and not local:
         hostname = socket.gethostname()
         self.exp = AzureExperiment(azure_config, skip_setup=skip_setup, name=hostname)
         self.master_addr = socket.gethostbyname(hostname)
         self.user = azure_config.admin_username
     else:
         self.exp = self.master_addr = self.user = None
Esempio n. 2
0
def e_entry(job, machine, machine_entries_list, ready_at):
    # machine is free?
    if len(machine_entries_list) == 0:
        return ScheduleEntry(job, machine, ready_at, ready_at + job.pduration)
    
    # if is not free
    # get earlist possible entry in the machine
    sched_entry = None
    it = iter(machine_entries_list) #ordered
    before = next(it)
    while(sched_entry is None): 
        start = max([before.end(), ready_at])
        end = start + job.pduration
        
        # sched before next
        try:
            after = next(it)
            if (end < after.start()):
                sched_entry = ScheduleEntry(job, machine, start, end)
            
            before = after
        
        # there's no next        
        except(StopIteration):
            sched_entry = ScheduleEntry(job, machine, start, end)
    
    return sched_entry
Esempio n. 3
0
 def sync_machines(self):
     slots = condor_slots()
     for s in slots:
         if s not in [m.condor_slot for m in self.machines]:
             machine = Machine()
             machine.status = MachineStatus.running
             machine.condor_slot = s
             boot_job = Job('boot', None)
             boot_entry = ScheduleEntry(boot_job, machine, None, None)
             boot_entry.log[LogKey.real_start] = self.creation_timestamp
             boot_entry.log[LogKey.real_end] = self.timestamp
             boot_entry.status = EntryStatus.completed
             self.entries.append(boot_entry)
             self.machines.append(machine)
             print "++Machine", s
Esempio n. 4
0
 def sync_machines(self):
     slots = condor_slots()
     for s in slots:
         if s not in [m.condor_slot for m in self.machines]:
             machine = Machine()
             machine.status = MachineStatus.running
             machine.condor_slot = s
             boot_job = Job('boot', None)
             boot_entry = ScheduleEntry(boot_job, machine, None, None)
             boot_entry.log[LogKey.real_start] = self.creation_timestamp
             boot_entry.log[LogKey.real_end] = self.timestamp
             boot_entry.status = EntryStatus.completed
             self.entries.append(boot_entry)
             self.machines.append(machine)
             print "++Machine", s
Esempio n. 5
0
    def __init__(self):
        self.workflow = Workflow()
        self.creation_timestamp = self.timestamp = datetime.now()
        self.logwatcher = LogWatcher()

        manager = Machine()
        manager.status = MachineStatus.manager
        manager.condor_slot = 'local'
        self.machines = [manager]

        boot_entry = ScheduleEntry(Job('boot', None), manager, None, None)
        boot_entry.real_start = self.timestamp
        boot_entry.real_end = self.timestamp
        boot_entry.status = EntryStatus.completed
        self.entries = [boot_entry]
        self.entries_cid = {}
Esempio n. 6
0
 def __init__(self):
     self.workflow = Workflow()
     self.creation_timestamp = self.timestamp = datetime.now()
     self.logwatcher = LogWatcher()
     
     manager = Machine()
     manager.status = MachineStatus.manager
     manager.condor_slot = 'local'
     self.machines = [manager]
     
     boot_entry = ScheduleEntry(Job('boot', None), manager, None, None)
     boot_entry.real_start = self.timestamp
     boot_entry.real_end = self.timestamp
     boot_entry.status = EntryStatus.completed
     self.entries = [boot_entry]
     self.entries_cid = {}
Esempio n. 7
0
def get_nmax(workflow, machines, schedule, vm_limit, timestamp, local):
    """
    Get the max number of machines that can be used by a workflow,
    based on the current state of the workflow execution.
    :param workflow: workflow structure
    :param machines: list of allocated machines
    :param entries: state of the execution
    :param timestamp: barrier timestamp
    :return n: max number of machines that can be used
    """
    if local:
        vm_boottime = timedelta(seconds=1)
    else:
        vm_boottime = VM_BOOTTIME
    
    TIMER.tick('before get_nmax')
    _machines = list(machines)
    _schedule = Schedule(schedule)
    
    # insertion policy
    need_new_host = True
    done_jobs = [e.job for e in schedule.entries if e.job != None]
    for job in workflow.ranked_jobs:
        if job in done_jobs:
            continue
        
        if need_new_host:
            # last machine added was used, so we need to add a new one
            # if we there's still room to add it else we stop
            if len(_machines) < vm_limit:
                
                new_machine = Machine()
                boot_entry = ScheduleEntry(None, new_machine, timestamp, timestamp+vm_boottime)
                _schedule.add_entry_host(boot_entry, new_machine)
                _machines.append(new_machine)
            else:
                break
        
        # schedule with the new machine
        new_entry = earliest_entry(job, _machines, _schedule, timestamp)
        _schedule.add_entry_host(new_entry, new_entry.host)
        #raw_input("\nPress Enter to continue...")
        # machine was used?
        if new_entry.host == new_machine:
            need_new_host = True
        else:
            need_new_host = False

    # last machine added wasn't used
    if need_new_host == False:
        _machines = _machines[:-1]
        
    TIMER.tick('after get_nmax')                
    return len(_machines)
    def __str__(self):
        """
        Return a user-friendly string of the timetable.

        Extends from ScheduleEntry.__str__
        @rtype: str
        """
        result = ''
        for ScheduleEntry in self.arrival:
            result += ScheduleEntry.__str__() + '\n'
        return result
Esempio n. 9
0
    def sync_jobs(self):
        log_entries = self.logwatcher.nexts()
        for le in log_entries:
            if le.id in self.entries_cid:  # in dict keys
                entry = self.entries_cid[le.id]
            else:
                entry = ScheduleEntry(condor_id=le.id)
                self.entries.append(entry)
                self.entries_cid[le.id] = entry
                print "++Job", le.id

            entry.log[le.event] = le.timestamp

            if le.event == LogKey.execute:
                entry.status = EntryStatus.executing
            elif le.event == LogKey.job_terminated:
                entry.status = EntryStatus.completed
                wf_id, dag_job_id, slot = condor_history(le.id)

                job = next(
                    (j for j in self.workflow.jobs
                     if j.dag_job_id == dag_job_id and j.wf_id == wf_id), None)
                if job:
                    entry.job = job
                    entry.host = next(
                        (m for m in self.machines if m.condor_slot == slot),
                        self.machines[0])
                    print "--Job", le.id, dag_job_id, entry.host.condor_slot
Esempio n. 10
0
 def sync_jobs(self):
     log_entries = self.logwatcher.nexts()
     for le in log_entries:
         if le.id in self.entries_cid: # in dict keys
             entry = self.entries_cid[le.id]
         else:
             entry = ScheduleEntry(condor_id=le.id)
             self.entries.append(entry)
             self.entries_cid[le.id] = entry
             print "++Job", le.id
             
         entry.log[le.event] = le.timestamp
         
         if le.event == LogKey.execute:
             entry.status = EntryStatus.executing
         elif le.event == LogKey.job_terminated:
             entry.status = EntryStatus.completed
             wf_id, dag_job_id, slot = condor_history(le.id)
             
             job = next((j for j in self.workflow.jobs if j.dag_job_id == dag_job_id and j.wf_id == wf_id), None)
             if job:
                 entry.job = job
                 entry.host = next((m for m in self.machines if m.condor_slot == slot), self.machines[0])
                 print "--Job", le.id, dag_job_id, entry.host.condor_slot
    def add_arrival(self, csv_file_name):
        """
        To add all arrivals from a given csv file at once.

        @type Schedule: str
        @param Schedule: a str that points to the csv file needed
        @rtype: None

        >>> path = "E:/Textbook/4th Year/CSC148/tokyo trains/"
        >>> weekday ='weekday schedule.csv'
        >>> train = Timetable()
        >>> train.add_arrival(path + weekday)
        """

        whole_schedule = pandas.read_csv(csv_file_name, encoding='utf8')
        for id in whole_schedule.index:
            self.arrival.append(
                ScheduleEntry(route=whole_schedule['route_long_name'][id],
                              direction=whole_schedule['trip_headsign'][id],
                              arrival_time=whole_schedule['arrival_time'][id]))
Esempio n. 12
0
def sched_cost_n(workflow, machines, schedule, n, timestamp, local):
    """
    Return the cost used by n machines from timestamp untill end of execution.
    """
    if local:
        vm_boottime = timedelta(seconds=1)
    else:
        vm_boottime = VM_BOOTTIME
    
    # existing machines
    _schedule = Schedule(schedule)
    
    if len(machines) < n:
        _machines = list(machines)
        # new machine + boot
        for _i in range(n-len(_machines)):
            machine = Machine()
            _machines.append(machine)
            boot_job = Job('boot', None)
            boot_job.pduration = vm_boottime
            boot_entry = ScheduleEntry(boot_job, machine, timestamp, timestamp+vm_boottime)
            _schedule.add_entry_host(boot_entry, machine)
    else:
        # don't use spare machines
        _machines = sorted(machines[1:], key=lambda m: schedule.entries_host[m][-1].end(), reverse=True)
        for _i in range(len(_machines)-n):
            _machines.pop()
        _machines.insert(0, machines[0]) # manager
    
    TIMER.tick("before sched")    
    sched_wf(workflow, _machines, _schedule, timestamp)
    TIMER.tick("after sched")
    
    _schedule.fix_machines()
    cost_pred, _wf_end = sched_cost_pred(_machines, _schedule, timestamp)
    return _schedule, cost_pred