Example #1
0
    def get_init_state(self, current_timestamp, df):
        # get queued jobs
        queued_df = df.loc[(df['ctime'] >= current_timestamp)
                           & (df['start'] < current_timestamp)].copy()
        queued_df = queued_df.reset_index()
        tmp_queue_list = []
        for index, row in queued_df.iterrows():
            job_id = str(uuid.uuid4())
            job_args = {
                'create_time': row['ctime'],
                'job_name': row['jobname'],
                'start_time': None,
                'total_run_time': row['end'] - row['start'],
                'required_n_nodes': int(row['nhosts']),
                'used_nodes': [],
                'remained_running_time': None,
                'job_id': job_id
            }
            num_full_slices = int(row['end'] - row['start']) / int(
                self.quantum)
            last_slice = int(row['end'] - row['start']) % int(self.quantum)

            tmp_list = []
            job_args['remained_running_time'] = self.quantum
            for i in range(num_full_slices):
                tmp_list.append(Job(**job_args))
            if num_full_slices == 0 or last_slice > 0:
                job_args['remained_running_time'] = last_slice
                tmp_list.append(Job(**job_args))
            tmp_queue_list.append(tmp_list)

        if len(tmp_queue_list) > 0:
            for i in range(max(tmp_queue_list, key=len)):
                for j in tmp_queue_list:
                    if i < len(tmp_queue_list[j]):
                        self.waiting_queue.append(tmp_queue_list[j][i])
        else:
            self.waiting_queue = []

        # get running jobs
        run_df = df.loc[(df['start'] >= current_timestamp)
                        & (df['end'] < current_timestamp)].copy()
        run_df = run_df.reset_index()
        for index, row in run_df.iterrows():
            job_args = {
                'create_time': row['ctime'],
                'job_name': row['jobname'],
                'start_time': row['start'],
                'total_run_time': row['end'] - row['start'],
                'required_n_nodes': int(row['nhosts']),
                'used_nodes': [],
                'remained_running_time': row['end'] - current_timestamp
            }
            available_nodes = Algorithm.get_available_nodes(self)
            if len(available_nodes) > job_args['required_n_nodes']:
                job_args['used_nodes'] = available_nodes[
                    0:job_args['required_n_nodes']]
                self.running_list.append(Job(**job_args))
                Algorithm.use_nodes(self, job_args['used_nodes'])
Example #2
0
def parse(year, semester):
    job = Job()
    url = 'http://course.thu.edu.tw/view-dept/' + str(year) + '/' + str(
        semester) + '/everything'
    res = requests.get(url)
    domain = 'http://course.thu.edu.tw'
    res = BeautifulSoup(res.text, 'lxml')
    for dp in res.select('tr a'):
        dp_url = domain + dp['href']
        job.add_job('thu', dp_parse, dp_url)
Example #3
0
def data_parse(url):
    job = Job()
    page_data = []
    res = requests.get(url=url, cookies={'over18': '1'}).text

    soup = BeautifulSoup(res, 'lxml')
    row = soup.select('.r-ent')
    for r in row:
        url = r.select('a')[0]['href']
        job.add_job('ptt', body_parse, 'https://www.ptt.cc' + url)
 def createJobFromImageFiles(self, filenames=None):
     """
     Create a job containing all images to convert
     @param: filenames List of images files
     @return: a Job instance containing all images to convert
     """
     masterJob = Job()
     for filename in filenames:
         job = self.createJobFromImageFile(filename)
         masterJob.addJob(job)
     return masterJob
Example #5
0
def parse(start,end):
    job =Job()
    if end == -1:
        url = 'https://rent.591.com.tw/home/search/rsList?is_new_list=1&type=1&kind=0&searchtype=1&region=1'
        res= requests.get(url,headers={'Host': 'rent.591.com.tw','User-Agent':'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36'})
        d = json.loads(res.text)
        end = int(d['records'].replace(",", ""))
    for i in range(start,end,30):
        if i == 0:
            i = 1
        url = 'https://rent.591.com.tw/home/search/rsList?is_new_list=1&type=1&kind=0&searchtype=1&region=1&firstRow='+str(i)+'&totalRows='+str(end)
        job.add_job('s591',data_parse,url)
    def createJobFromMediaDirectory(self,
                                    mediaDirectoryPath,
                                    videoFileExtensions,
                                    imageFileExtensions):
        """
        Creates a job to convert all supported media files. 
        @param: mediaDirectoryPath Path to media directory
        @param: videoFileExtensions List of video file extensions
        @param: imageFileExtensions List of audio file extensions
        @return: a Job instance containing all jobs to execute.
        """
        filenames = list()
        selectedVideoFilenames = list()
        selectedImageFilenames = list()
        # Retrieve list of all files in directory
        for root, dirs, files in os.walk(os.path.abspath(mediaDirectoryPath),
                                         topdown=False):
            for name in files:
                filename = os.path.join(root, name)
                filenames.append(filename)

        # Filter based on several criteriae
        for filename in filenames:
            fileExtension = os.path.splitext(filename)[1][1:].strip()
            fileNameWithoutExtension = os.path.splitext(filename)[0].strip()
            fileBasenameWithoutExtension = basename(fileNameWithoutExtension)
            fileExtensionLowerCase = fileExtension.lower()
            # Accept only non hidden files with specific file extension
            if (len(fileBasenameWithoutExtension) > 0 and
                    len(fileExtensionLowerCase) > 0):
                if fileExtensionLowerCase in videoFileExtensions:
                    selectedVideoFilenames.append(filename)
                elif fileExtensionLowerCase in imageFileExtensions:
                    selectedImageFilenames.append(filename)

        masterJob = Job()
        videoJob = self.createJobFromVideoFiles(selectedVideoFilenames)
        masterJob.addJob(videoJob)
        imageJob = self.createJobFromImageFiles(selectedImageFilenames)
        masterJob.addJob(imageJob)
        return masterJob
Example #7
0
    def create(self, context_lib_name, payload):
        context = self.creator(kind='context',
                               lib_name=context_lib_name,
                               name='Context',
                               payload=payload)
        kwargs = {'context': context}
        logging.info(
            f'create new job for {context.name}, event {context.event}')
        try:
            with open(JOB_CONFIG) as f:
                kwargs.update(json.load(f)[context.name][context.event])
        except:
            logging.info(f'use default job')
            pass

        job = Job(**kwargs)

        runner = self.creator(kind='runner',
                              lib_name=job.runner_lib,
                              name='Runner')
        job.set_runner(runner)

        payload_provider = self.creator(kind='job_payload',
                                        lib_name=job.payload_lib,
                                        name='JobPayloadProvider')
        job.set_payload_provider(payload_provider)

        return job
Example #8
0
 def __init__(self, num_nodes, log_df, epoch_start, epoch_end):
     self.log_queue = Queue.Queue()
     filtered_df = log_df.loc[(log_df['ctime'] >= epoch_start)
                              & (log_df['ctime'] < epoch_end)].copy()
     filtered_df = filtered_df.reset_index().sort_values('ctime')
     for index, row in filtered_df.iterrows():
         job_args = {
             'create_time': row['ctime'],
             'job_name': row['jobname'],
             'start_time': None,
             'total_run_time': row['end'] - row['start'],
             'required_n_nodes': int(row['nhosts']),
             'used_nodes': [],
             'remained_running_time': row['end'] - row['start']
         }
         self.log_queue.put(Job(**job_args))
     self.waiting_queue = []
     self.running_list = []
     self.node_pool = {}
     for i in range(num_nodes):
         self.node_pool['b{}'.format(str(i))] = 0
Example #9
0
import os

from job.job import Job
from local_windows.local_windows import LocalWindows

if __name__=='__main__':
    job=Job('input.json')
    job.run_today_job()
Example #10
0
def page_parse(board, start, end):
    job = Job()
    for i in range(int(start), int(end) + 1):
        job.add_job('ptt', data_parse,
                    'https://www.ptt.cc/bbs/' + board + '/index%d.html' % i)