def save_task(cls, name, category_id, creator_id, scheduler_id, account_ids, **kwargs): task = Task() task.name = name task.category = category_id task.creator = creator_id task.scheduler = scheduler_id task.real_accounts_num = task.accounts_num = len(account_ids) for k, v in kwargs.items(): if hasattr(task, k): setattr(task, k, v) task.last_update = datetime.datetime.now() db_session.add(task) db_session.commit() # task.accounts = account_ids # account_ids只是id列表,不能赋值 for acc_id in account_ids: tag = TaskAccountGroup() tag.task_id = task.id tag.account_id = acc_id db_session.add(tag) db_session.commit() return task
def add_job(cls, job): if isinstance(job, Job): db_session.add(job) db_session.commit() return True return False
def add_account(cls, account): if isinstance(account, Account): db_session.add(account) db_session.commit() return True return False
def set_job_by_track_ids(cls, track_ids, values): jobs = db_session.query(Job).filter(Job.track_id.in_(track_ids)).all() track_ids_copy = track_ids.copy() try: for job in jobs: track_ids.remove(job.track_id) value = values.get(job.track_id, {}) new_status = value.get('status') new_result = value.get('result', '') new_traceback = value.get('traceback', '') if job.status != new_status: # 第一次变成running的时间即启动时间 if new_status == 'running': job.start_time = datetime.datetime.now() if new_status in ['succeed', 'failed']: job.end_time = datetime.datetime.now() job.result = new_result job.traceback = new_traceback job.status = new_status db_session.commit() except: logger.exception('set_job_by_track_ids catch exception.') db_session.rollback() return track_ids_copy return track_ids
def save_account_category(cls, category, name=''): acg = AccountCategory() acg.category = category acg.name = name db_session.add(acg) db_session.commit() return acg
def update_task_name(cls, parameter): spider_name = int(parameter['task_id']) main_url_pids = parameter['main_url_pids'] operation = str(parameter['operation']) if main_url_pids != "": try: if operation == "import": for main_url_pid in eval(main_url_pids): main_url = db_session.query(MainUrl).filter( MainUrl.pid == main_url_pid).first() main_url.spider_name = spider_name elif operation == "remove": for main_url_pid in eval(main_url_pids): main_url = db_session.query(MainUrl).filter( MainUrl.pid == main_url_pid).first() main_url.spider_name = 0 db_session.commit() db_session.close() return {"code": "200", "message": "更新成功"} except (SqlalchemyIntegrityError, PymysqlIntegrityError, InvalidRequestError): db_session.close() return {"code": "404", "message": "更新失败"} else: return {"code": "202", "message": "并没有移除数据"}
def add_scheduler(cls, scheduler): if isinstance(scheduler, Scheduler): db_session.add(scheduler) db_session.commit() return True return False
def save_finger_print(cls, name, value): fp = FingerPrint() fp.name = name fp.value = value db_session.add(fp) db_session.commit() return fp
def opt_db(): print(datetime.datetime.now()) jobs = [] for x in range(3000, 6000): job = Job() job.task = 2 job.account = 2 job.agent = 1 job.status = 'running_th' job.track_id = '{}'.format(x) job.start_time = datetime.datetime.now() jobs.append(job) db_lock.acquire() print('thead dbs1={}'.format(db_session)) db_session.add_all(jobs) db_session.commit() db_lock.release() db_lock.acquire() print('thead dbs2={}'.format(db_session)) db_session.execute(Job.__table__.insert(), [{ 'task': 1, 'account': 3, 'agent': 1, 'status': 'ffff', 'track_id': '{}'.format(x), 'start_time': datetime.datetime.now() } for x in range(8000, 10000)]) db_session.commit() db_lock.release()
def test_db(): import threading th = threading.Thread(target=opt_db) th.start() jobs = [] for x in range(10010, 20000): job = Job() job.task = 2 job.account = 2 job.agent = 1 job.status = 'running11' job.track_id = '{}'.format(x) job.start_time = datetime.datetime.now() jobs.append(job) db_lock.acquire() print(datetime.datetime.now()) db_session.add_all(jobs) # db_session.commit() db_session.execute(Job.__table__.insert(), [{ 'task': 1, 'account': 3, 'agent': 1, 'status': 'ffff', 'track_id': '{}'.format(x), 'start_time': datetime.datetime.now() } for x in range(1000)]) db_session.commit() print(db_session) db_session.close() db_lock.release() print(datetime.datetime.now())
def add_all(cls, datas): try: db_session.add_all(datas) db_session.commit() except Exception: for data in datas: cls.add_one(data)
def add_task(cls, task): if isinstance(task, Task): task.last_update = datetime.datetime.now() db_session.add(task) db_session.commit() return True return False
def save_user_category(cls, category, name, description): uc = UserCategory() uc.category = category uc.name = name uc.description = description db_session.add(uc) db_session.commit() return uc
def save_agent(cls, area, status=0, config=''): agent = Agent() agent.status = status agent.active_area = area agent.config = config db_session.add(agent) db_session.commit() return agent
def set_job_result(cls, job_id, result): job = db_session.query(Job).filter(Job.id == job_id).first() if job: job.result = result db_session.commit() return True return False
def save_user(cls, category=0, enable_tasks='', token=''): user = User() user.category = category user.enable_tasks = enable_tasks user.token = token db_session.add(user) db_session.commit() return user
def set_aps_status(cls, aps_id, status): tag = db_session.query(TaskAccountGroup).filter( TaskAccountGroup.aps_id == aps_id).first() if tag: tag.status = status db_session.commit() return True return False
def set_task_result(cls, task_id, result): task = db_session.query(Task).filter(Task.id == task_id).first() if task: task.result = result task.last_update = datetime.datetime.now() db_session.commit() return True return False
def add_one(cls, parameter): mainurl = MainUrl() mainurl.address = parameter['address'] mainurl.webSite = parameter['webSite'] mainurl.status = 0 mainurl.remark = "" mainurl.sort = parameter['sort'] db_session.add(mainurl) db_session.commit() db_session.close()
def add_one(cls, parameter): spider_task = SpiderTask() spider_task.task_name = parameter['task_name'] spider_task.status = 0 spider_task.creater = 'admin' spider_task.create_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) db_session.add(spider_task) db_session.commit() db_session.close() return {"code": "200", "message": "succeed"}
def save_account(cls, account, password, category, owner, **kwargs): acc = Account() acc.account = account acc.password = password acc.category = category acc.owner = owner for k, v in kwargs.items(): if hasattr(acc, k): setattr(acc, k, v) db_session.add(acc) db_session.commit() return acc
def save_scheduler(cls, mode=0, interval=600, start_date=datetime.datetime.now(), end_date=None): sch = Scheduler() sch.mode = mode sch.interval = interval sch.start_date = start_date sch.end_date = end_date db_session.add(sch) db_session.commit() return sch
def save_jobs(cls, jobs): for job in jobs: if isinstance(job, dict): job = Job().dict2Job(job) if job.status == 'running': job.start_time = datetime.datetime.now() db_session.add(job) db_lock.acquire() db_session.commit() db_lock.release() return True
def save_task_category(cls, category, name, processor, configure='', description=''): tag = TaskCategory() tag.category = category tag.name = name tag.processor = processor tag.configure = configure tag.description = description db_session.add(tag) db_session.commit() return tag
def set_job_status(cls, job_id, status): job = db_session.query(Job).filter(Job.id == job_id).first() if job: if job.status != status: # 第一次变成running的时间即启动时间 if status == 'running': job.start_time = datetime.datetime.now() if status in ['success', 'failure']: job.end_time = datetime.datetime.now() job.status = status db_session.commit() return True return False
def set_job_by_track_id(cls, track_id, status, result='', traceback=''): job = db_session.query(Job).filter(Job.track_id == track_id).first() if job: if job.status != status: # 第一次变成running的时间即启动时间 if status == 'running': job.start_time = datetime.datetime.now() if status in ['succeed', 'failed']: job.end_time = datetime.datetime.now() job.result = result job.traceback = traceback job.status = status db_session.commit() return True return False
def update_mainurl(cls, parameter): pid = parameter['pid'] mainurl = db_session.query(MainUrl).filter(MainUrl.pid == pid).first() try: remark = parameter['remark'] mainurl.remark = remark db_session.commit() db_session.close() except: pass try: status = parameter['status'] mainurl.status = status db_session.commit() db_session.close() except: pass try: rule = parameter['rule'] mainurl.rule = rule db_session.commit() db_session.close() except: pass
def delete_one(cls, parameter): maininfo = db_session.query(MainUrl).filter( MainUrl.pid == parameter["pid"]).first() db_session.delete(maininfo) db_session.commit() db_session.close()
def delete_one(cls, parameter): spider_task = db_session.query(SpiderTask).filter( SpiderTask.id == parameter["id"]).first() db_session.delete(spider_task) db_session.commit() db_session.close()
def update_status(cls, parameter): spider_task = db_session.query(SpiderTask).filter( SpiderTask.id == int(parameter["id"])).first() spider_task.status = int(parameter["status"]) db_session.commit() db_session.close()