Beispiel #1
0
def project_stats(project_id):
    Project.query.get_or_404(project_id)
    session['project_id'] = project_id
    run_stats = JobExecution.list_run_stats_by_hours(project_id)
    pending_stats = JobExecution.list_jobs_stats('pending')
    running_stats = JobExecution.list_jobs_stats('running')
    return render_template("project_stats.html",
                           run_stats=run_stats,
                           pending_stats=pending_stats,
                           running_stats=running_stats,
                           time=datetime.datetime.now().ctime())
Beispiel #2
0
    def sync_job_status(self, project):
        for spider_service_instance in self.spider_service_instances:
            job_status = spider_service_instance.get_job_list(
                project.project_name)
            job_execution_list = JobExecution.list_uncomplete_job()
            job_execution_dict = dict([
                (job_execution.service_job_execution_id, job_execution)
                for job_execution in job_execution_list
            ])
            # running
            for job_execution_info in job_status[SpiderStatus.RUNNING]:
                job_execution = job_execution_dict.get(
                    job_execution_info['id'])
                if job_execution and job_execution.running_status == SpiderStatus.PENDING:
                    job_execution.start_time = job_execution_info['start_time']
                    job_execution.running_status = SpiderStatus.RUNNING

            # finished
            for job_execution_info in job_status[SpiderStatus.FINISHED]:
                job_execution = job_execution_dict.get(
                    job_execution_info['id'])
                if job_execution and job_execution.running_status != SpiderStatus.FINISHED:
                    job_execution.start_time = job_execution_info['start_time']
                    job_execution.end_time = job_execution_info['end_time']
                    job_execution.running_status = SpiderStatus.FINISHED

                    res = requests.get(self.log_url(job_execution))
                    res.encoding = 'utf8'
                    raw = res.text[-4096:]
                    match = re.findall(job_execution.RAW_STATS_REGEX, raw,
                                       re.DOTALL)
                    if match:
                        job_execution.raw_stats = match[0]
                        job_execution.process_raw_stats()

            db.session.commit()
Beispiel #3
0
 def start_spider(self, job_instance):
     project = Project.query.get(job_instance.project_id)
     spider_name = job_instance.spider_name
     arguments = {}
     if job_instance.spider_arguments:
         arguments = dict(
             map(lambda x: x.split("="),
                 job_instance.spider_arguments.split(",")))
     threshold = 0
     daemon_size = len(self.spider_service_instances)
     if job_instance.priority == JobPriority.HIGH:
         threshold = int(daemon_size / 2)
     if job_instance.priority == JobPriority.HIGHEST:
         threshold = int(daemon_size)
     threshold = 1 if threshold == 0 else threshold
     candidates = self.spider_service_instances
     leaders = []
     if 'daemon' in arguments:
         for candidate in candidates:
             if candidate.server == arguments['daemon']:
                 leaders = [candidate]
     else:
         # TODO optimize some better func to vote the leader
         for i in range(threshold):
             leaders.append(random.choice(candidates))
     for leader in leaders:
         serviec_job_id = leader.start_spider(project.project_name,
                                              spider_name, arguments)
         job_execution = JobExecution()
         job_execution.project_id = job_instance.project_id
         job_execution.service_job_execution_id = serviec_job_id
         job_execution.job_instance_id = job_instance.id
         job_execution.create_time = datetime.datetime.now()
         job_execution.running_on = leader.server
         db.session.add(job_execution)
         db.session.commit()
Beispiel #4
0
 def get(self, project_id):
     return JobExecution.list_jobs(project_id)
Beispiel #5
0
def job_dashboard(project_id):
    Project.query.get_or_404(project_id)
    session['project_id'] = project_id
    return render_template("job_dashboard.html",
                           job_status=JobExecution.list_jobs(project_id))
Beispiel #6
0
def service_stats(project_id):
    Project.query.get_or_404(project_id)
    session['project_id'] = project_id
    run_stats = JobExecution.list_run_stats_by_hours(project_id)
    return render_template("server_stats.html", run_stats=run_stats)