コード例 #1
0
ファイル: app.py プロジェクト: cesard90/optix-demo
def save_job(content):
    job = Job()
    job.name = content['name']
    session.add(job)
    session.commit()
    #Save tasks & taskqueue
    tasks = content['tasks']
    for t in tasks:
        save_task(t, job.id)
    return 'ok'
コード例 #2
0
ファイル: main.py プロジェクト: victorsavu3/mufibu
def submit_job():
	session = Session()

	job = Job()

	job.entity_id = request.form['entity_id']
	job.person_id = request.form['person_id']
	job.type = request.form['type']

	session.add(job)

	session.commit()

	return redirect(url_for('item', id=request.form['entity_id']))
コード例 #3
0
ファイル: main.py プロジェクト: dimpu/python-job-bot
def save_job(page):
    for result in page.find_all('div', attrs={'class': 'result'}):
        job = result.find('a', attrs={'class': 'turnstileLink'})
        title = job.text
        url = site_url + job.get('href')
        company = result.find('span', attrs={'class': 'company'}).text
        location = result.find('span', attrs={'class': 'location'}).text
        summary = result.find('span', attrs={'class': 'summary'}).text
        Job.create(title=title,
                   url=url,
                   company=company,
                   location=location,
                   summary=summary,
                   applied=False)
    next_page(page)
コード例 #4
0
ファイル: handlers.py プロジェクト: laalaguer/gae_heartbeat
 def get(self):
     active = self.request.get('active', None)
     jobs = None
     if  active == 'true':
         jobs = Job.query_whole_by_active(True) # an iterator.
     elif active == 'false':
         jobs = Job.query_whole_by_active(False) # an iterator.
     else:
         jobs = Job.query_whole()
     json_dict = {}
     json_dict['jobs'] = []
     for job in jobs:
         json_dict['jobs'].append(job.to_dict(exclude=['add_date'])) # we don't want to include date objects.
     
     self.response.charset = 'utf-8'
     self.response.content_type = 'application/json'
     self.response.out.write(json.dumps(json_dict,ensure_ascii=False,indent=2, sort_keys=True).encode('utf-8'))
コード例 #5
0
def init(job):
    time.sleep(1)
    driver.get(job.url)
    if driver.find_elements_by_class_name("indeed-apply-button"):
        elem = driver.find_elements_by_class_name("indeed-apply-button")[0]
        elem.click()
        time.sleep(1)
        iframe = driver.find_element_by_xpath(
            "//*[@class='indeed-apply-bd']/iframe")
        driver.switch_to_frame(iframe)
        time.sleep(1)
        iframe = driver.find_element_by_xpath("/html/body/iframe")
        driver.switch_to_frame(iframe)
        complete_step_one(job)
        # driver.close()
    else:
        Job.update(applied=True).where(Job.applied == True).execute()
コード例 #6
0
def main():
    for job in Job.select().where(Job.applied == False):
        print(job.title)
        print(job.applied)
        print(job.url)
        try:
            init(job)
        except:
            init(job)
コード例 #7
0
ファイル: handlers.py プロジェクト: laalaguer/gae_heartbeat
 def get(self, hash_id):
     jobs = Job.query_by_hash(str(hash_id)) # an iterator.
     json_dict = {}
     json_dict['jobs'] = []
     for job in jobs:
         json_dict['jobs'].append(job.to_dict(exclude=['add_date'])) # we don't want to include date objects.
     
     self.response.charset = 'utf-8'
     self.response.content_type = 'application/json'
     self.response.out.write(json.dumps(json_dict,ensure_ascii=False,indent=2, sort_keys=True).encode('utf-8'))
コード例 #8
0
def scrape_jobs(workspace, job_dict, session, api, result):
    log.debug(f"Scraping job, id: {job_dict['job_id']}")
    settings = job_dict.get("settings", {})
    job = Job(
        job_id=job_dict["job_id"],
        created_time=to_time(job_dict["created_time"]),
        name=settings["name"],
        workspace_id=workspace.id,
        max_concurrent_runs=settings["max_concurrent_runs"],
        timeout_seconds=settings["timeout_seconds"],
        email_notifications=settings["email_notifications"],
        # TODO: determine how should we handle the diff between new/existing
        # clusters
        new_cluster=(settings.get(
            "new_cluster",
            {"cluster_id": settings.get("existing_cluster_id")})),
        schedule_quartz_cron_expression=(settings.get(
            "schedule", {}).get("quartz_cron_expression")),
        schedule_timezone_id=settings.get("schedule", {}).get("timezone_id"),
        task_type=get_task_type(settings),
        notebook_path=settings.get("notebook_task", {}).get("notebook_path"),
        notebook_revision_timestamp=(settings.get(
            "notebook_task", {}).get("revision_timestamp")),
    )

    if "creator_user_name" in job_dict:
        job.creator_user_name = job_dict.get("creator_user_name")

    session.merge(job)
    result.num_jobs += 1
    job_runs_response = api.jobs.list_runs(job_id=job_dict["job_id"],
                                           limit=120)
    job_runs = job_runs_response.get("runs", [])
    log.debug(f"Scraping job runs for job_id: {job_dict['job_id']}")
    for job_run in job_runs:
        scrape_job_run(workspace, job_run, session, result)
    log.debug(f"Finished job_run scraping for job_id: {job_dict['job_id']}. "
              f"Runs scraped: {len(job_runs)}")
コード例 #9
0
ファイル: server.py プロジェクト: ParkerDiamond/OpenMM-PubSub
def post_job():

    ''' Users will need to provide their username and password in
        the payload in order to post a job. They also need to specify
        the payout amount which cannot exceed the amount in their account.'''

    try:
        data = request.form
        username = data['username']
        password = data['password']

        user = User.query.filter((User.username == username) & (User.password == password)).first()
        if user is None:
            raise ValueError("Invalid username or password")
        account = Account.query.filter(Account.id == user.account_id).first():

        est_hours = data['est_hours']
        payout = data['payout']

        if account.balance < payout:
            raise ValueError("Account balance is less than payout")
        else:
            account.balance -= payout

        job_files = request.files['job_files']
        if job_files:
            handle, placeholder = tempfile.mkstemp(dir=app.config['JOB_DIR'])
            os.close(handle)
            os.remove(os.path.join(app.config['JOB_DIR'], placeholder))
            job_files.save(os.path.join(app.config['JOB_DIR'], placeholder))
                
            newJob = Job(id=int(hash(placeholder)),
                         files=placeholder,
                         est_hours=float(est_hours),
                         payout=float(payout))
            db.session.add(newJob)
            db.session.commit()

        response = jsonify({'Status':'Success'})
        response.status_code = 200
        return response
    except (ValueError,KeyError) as ex:
        response = jsonify({'Error': str(ex)})
        response.status_code = 400
        return response
コード例 #10
0
ファイル: handlers.py プロジェクト: laalaguer/gae_heartbeat
    def post(self, hash_id):
        jsonstring = self.request.body
        payload = None
        try:
            payload = json.loads(jsonstring)
            if payload['success'] == True:
                if not payload['response']:
                    raise Exception('need response - a json object')
            elif payload['success'] == False:
                if not (payload['will_retry'] or payload['fail_reason']):
                    raise Exception('need will_retry, fail_reason')
            else:
                raise Exception('success parameter is either boolean true or false')
        except Exception as ex:
            self.error(400)
            self.response.out.write('Your Data Error, ' + str(ex))



        # step 1, get the job in the queue, but if not found, error 404
        try:
            jobs = Job.query_by_hash(str(hash_id)) # an iterator.
            if len(jobs):
                for each in jobs: # modify the job status according to the request
                    # step 2, stuff the job status with new data here.
                    if payload['success']:
                        each.success = True
                        each.will_retry = False
                        each.response = payload['response']
                    else:
                        each.success = False
                        each.will_retry = payload['will_retry']
                        each.fail_reason = payload['fail_reason']
                        each.fail_times = each.fail_times + 1 # add one to the failure times
                    each.put() # store it into database
                    self.response.out.write(each.public_hash_id)
            else:
                self.error(404)
                self.response.out.write('Job Not Found')
        except Exception as ex:
            self.error(500)
            self.response.out.write('Database Query Error ' + str(ex))
コード例 #11
0
ファイル: scraper.py プロジェクト: kmate/dac
def scrape_jobs(workspace, job_dict, session, api, result):
    log.debug(f"Scraping job, id: {job_dict['job_id']}")
    settings = job_dict.get("settings", {})
    job = Job(
        job_id=job_dict["job_id"],
        created_time=to_time(job_dict["created_time"]),
        name=settings.get("name", "Untitled"),
        workspace_id=workspace.id,
        max_concurrent_runs=settings.get("max_concurrent_runs", 1),
        timeout_seconds=settings.get("timeout_seconds"),
        email_notifications=settings.get("email_notifications", []),
        new_cluster=settings.get("new_cluster"),
        existing_cluster_id=settings.get("existing_cluster_id"),
        task_type=get_task_type(settings),
        task_parameters=settings.get(get_task_type(settings).lower(), {})
    )

    if "creator_user_name" in job_dict:
        job.creator_user_name = job_dict.get("creator_user_name")

    if "schedule" in settings:
        schedule = settings.get("schedule", {})
        job.schedule_quartz_cron_expression = schedule.get("quartz_cron_expression")
        job.schedule_timezone_id = schedule.get("timezone_id")

    if job.task_type == 'notebook':
        task = settings.get("notebook_task", {})
        job.notebook_path = task.get("notebook_path")
        job.notebook_revision_timestamp = task.get("revision_timestamp")

    session.merge(job)
    result.num_jobs += 1
    job_runs = query_paginated(api.jobs.list_runs,
                               {'job_id': job_dict["job_id"]},
                               'runs')

    log.debug(f"Scraping job runs for job_id: {job_dict['job_id']}")
    for job_run in job_runs:
        scrape_job_run(workspace, job_run, session, result)
    log.debug(f"Finished job_run scraping for job_id: {job_dict['job_id']}. "
              f"Runs scraped: {len(job_runs)}")
コード例 #12
0
ファイル: queue.py プロジェクト: dyens/travian
 def save_to_db(self):
     Job.clear()
     for i in self.queue:
         job = Job(i[0], i[1], i[2])
         session.add(job)
     session.commit()
コード例 #13
0
def send_task_2_worker(task_id):
    """
    定时任务响应函数,负责把任务按账号拆解成job, 并发送给最适合的队列
    :param task_id: 任务id
    :return: 成功返回True, 失败返回False
    """
    try:
        jobs = []
        time_it_beg = datetime.datetime.now()
        db_scoped_session = ScopedSession()
        task = db_scoped_session.query(
            Task.category, Task.configure, Task.limit_counts,
            Task.succeed_counts,
            Task.scheduler).filter(Task.id == task_id).first()
        if not task:
            logger.error(
                'send_task_2_worker can not find the task, id={}. '.format(
                    task_id))
            return False

        category, task_configure, limit_counts, succeed_counts, sch_id = task

        sch_mode = db_scoped_session.query(
            Scheduler.mode).filter(Scheduler.id == sch_id).first()

        # 对于周期性任务,每次产生的job会严格控制, 但对于一次性任务, 用户指定多少个账号,就用多少个账号
        if sch_mode[0] in [1, 2]:
            if limit_counts:
                # 如果当前任务的成功数大于需求数, 或者成功数加上正在运行的job数目大于用于需求数110%, 则不需要继续产生job
                if succeed_counts >= int(limit_counts * 1.2):
                    logger.warning(
                        'send_task_2_worker ignore, task already finished, task id={}, succeed jobs({}) >= limit counts({})*1.2'
                        .format(task_id, succeed_counts, limit_counts))
                    return True

                task_running_jobs = db_scoped_session.query(Job).filter(
                    and_(Job.task == task_id,
                         Job.status == 'running')).count()
                if task_running_jobs + succeed_counts >= int(
                        limit_counts * 1.2):
                    logger.warning(
                        'send_task_2_worker ignore, task will finish, task id={}, succeed jobs({})+running jobs({})  >= limit counts({})*1.2'
                        .format(task_id, succeed_counts, task_running_jobs,
                                limit_counts))
                    return True

                # 一个任务正在运行job积压过多时, 暂时停止产生新的jobs
                if task_running_jobs >= 10000:
                    logger.warning(
                        'task({}) jobs num={} has reached jobs limit 10000'.
                        format(task_id, task_running_jobs))
                    return True

        # 根据task的类别,找到task对应的处理函数
        tcg = db_scoped_session.query(TaskCategory.processor).filter(
            TaskCategory.category == category).first()
        if not tcg:
            return False

        # 每一个类型的任务都对应一个处理器
        task_processor = tcg[0]
        if not task_processor:
            logger.error(
                'Task(id={}) have no processor, ignore processing.'.format(
                    task_id))
            return False

        logger.info(
            '---------send_task_2_worker task id={}. --------'.format(task_id))

        # 找到任务的所有账号
        res = db_scoped_session.query(TaskAccountGroup.account_id).filter(
            TaskAccountGroup.task_id == task_id).all()
        account_ids = [x[0] for x in res]
        accounts = db_scoped_session.query(
            Account.id, Account.status, Account.account, Account.password,
            Account.email, Account.email_pwd, Account.gender,
            Account.phone_number, Account.birthday, Account.national_id,
            Account.name, Account.active_area, Account.active_browser,
            Account.profile_path,
            Account.configure).filter(Account.id.in_(account_ids)).all()

        # agents = db_scoped_session.query(Agent.id, Agent.active_area).filter(Agent.status != -1).order_by(Agent.status).all()

        # 一个任务会有多个账号, 按照账号对任务进行第一次拆分
        real_accounts_num = 0
        for acc in accounts:
            acc_id, status, account, password, email, email_pwd, gender, phone_number, birthday, national_id, name, \
            active_area, active_browser_id, profile_path, account_configure = acc

            if status == 'invalid':
                logger.warning(
                    'account status in invalid. task id={}, account id={}'.
                    format(task_id, acc_id))
                continue

            area = db_scoped_session.query(Area).filter(
                Area.id == active_area).first()
            queue_name = 'default'
            area_id = None
            if area:
                area_id, queue_name = area.id, area.name
            else:
                logger.warning(
                    'There have no optimal agent for task, task id={}, account id={}, account area={}'
                    .format(task_id, acc_id, active_area))

            active_browser = db_scoped_session.query(FingerPrint.value).filter(
                FingerPrint.id == active_browser_id).first()

            if get_system_args()["force_display"] == 0:
                headless = True if get_environment() == 'pro' else False
            else:
                headless = False
            # 构建任务执行必备参数
            inputs = {
                'system': {
                    'headless': headless
                },
                'task': {
                    'task_id': task_id,
                    'configure':
                    json.loads(task_configure) if task_configure else {},
                },
                'account': {
                    'account': account,
                    'password': password,
                    'status': status,
                    'email': email,
                    'email_pwd': email_pwd,
                    'gender': gender,
                    'phone_number': phone_number,
                    'birthday': birthday,
                    'national_id': national_id,
                    'name': name,
                    'active_area': active_area,
                    'active_browser':
                    json.loads(active_browser[0]) if active_browser else {},
                    'profile_path': profile_path,
                    'configure':
                    json.loads(account_configure) if account_configure else {}
                }
            }

            celery_task_name = "tasks.tasks.{}".format(task_processor)
            real_accounts_num += 1

            track = app.send_task(celery_task_name,
                                  args=(inputs, ),
                                  queue=queue_name,
                                  routing_key=queue_name)

            logger.info(
                '-----send sub task to worker, celery task name={}, area id={}, queue={}, '
                'task id={}, account id={}, track id={}'.format(
                    celery_task_name, area_id, queue_name, task_id, acc_id,
                    track.id))

            job = Job()
            job.task = task_id
            job.task = task_id
            job.account = acc_id
            job.area = area_id
            job.status = 'running'
            job.track_id = track.id
            job.start_time = datetime.datetime.now()
            jobs.append(job)

            if sch_mode[0] in [1, 2]:
                # 如果已经在运行的jobs,加上当前产生的jobs数量超过用户需求数量,则break, 停止生产jobs, 下个调度周期重新检测再试
                total_running_jobs = task_running_jobs + real_accounts_num
                if (limit_counts and total_running_jobs >= int(
                        limit_counts * 1.2)) or total_running_jobs >= 10000:
                    logger.warning(
                        'task({}) total running jobs num({}) is already more than limit counts({})*1.2'
                        .format(task_id, total_running_jobs, limit_counts))
                    break

        # 更新任务状态为running
        # task实际可用的账号数目, 会根据每次轮循时account状态的不同而变化
        db_scoped_session.query(Task).filter(and_(Task.id == task_id, Task.status.in_(['new', 'pending'])))\
            .update({Task.status: "running", Task.start_time: datetime.datetime.now(),
                     Task.real_accounts_num: real_accounts_num, Task.last_update: datetime.datetime.now()}, synchronize_session=False)

        if jobs:
            db_scoped_session.add_all(jobs)

        db_scoped_session.commit()

        logger.info(
            '----send_task_2_worker send task {}, produce jobs={}, used {} seconds. '
            .format(task_id, real_accounts_num,
                    (datetime.datetime.now() - time_it_beg).seconds))
    except BaseException as e:
        logger.exception(
            'send_task_2_worker exception task id={}, e={}'.format(task_id, e))
        db_scoped_session.rollback()
    finally:
        ScopedSession.remove()

    return True