def post_images(): for item in ItemInfo.poll_status(TaskStage.Posting, TaskStatus.Queued): channels = ItemInfo.get_channels(item) for ch in channels: for pipe in config.pipeline[ch].push: SecondaryTask.add_task(item.service, item.item_id, pipe.service, pipe.config, ch) ItemInfo.set_status(item.service, item.item_id, TaskStage.Posting, TaskStatus.Pending) for stype, item_id, ptype, conf, ch, poll_counter in SecondaryTask.poll_tasks(20): SecondaryTask.acquire_task(stype, item_id, ptype, conf, ch) print((stype.value, item_id), '=>', (ptype.value, conf)) item = ItemInfo.get_item(stype, item_id) if not service_exists(ptype, conf): continue client = get_service(ptype, conf) if poll_counter >= client.push_limit(): print("Failed to push item.") SecondaryTask.close_task(stype, item_id, ptype, conf, ch) else: images = [BytesIO(i.read()) for i in ItemInfo.get_images(item)] if item.attachment_urls: attachment_images = [ BytesIO(i.read()) for i in ItemInfo.get_attachment_images(item) ] images.extend(attachment_images) converted_username = pull_services[item.service].convert_username(item.source_id) try: client.push_item(item, images, ch, converted_username) except Exception as err: traceback.print_exc() SecondaryTask.release_task(stype, item_id, ptype, conf, ch) else: SecondaryTask.close_task(stype, item_id, ptype, conf, ch) if SecondaryTask.task_done(stype, item_id): print("Post Done", (item.service, item.item_id)) ItemInfo.set_status(item.service, item.item_id, TaskStage.Cleaning, TaskStatus.Queued)
def run_schedule(task): minutes, func = task_conf[task] func = partial(try_to_run, func) connect_db() ItemInfo.clean_pending_items() while True: try: func() except KeyboardInterrupt: break time.sleep(minutes * 60)
def _add_item(pipeline_name): url = request.form.get("url") config = load_config() pipeline = config.pipeline[pipeline_name] for p in pipeline.pull: pull = pull_services[p.service] item_id = pull.parse_item_id(url) if item_id is None: continue ItemInfo.add_index(IndexItem(service=p.service, item_id=item_id), [pipeline_name]) ItemInfo.set_status(p.service, item_id, TaskStage.Fetching, TaskStatus.Queued) return redirect(f"/pipeline/{pipeline_name}") return "Unknown URL"
def _failure_browser(item_type, item_stage): item_type = ServiceType(item_type) item_stage = TaskStage(item_stage) items = ItemInfo.get_failures(service=item_type, stage=item_stage) return render_template('failures.jinja2', items=items, stage=item_stage, item_type=item_type)
def update_subs(): config = load_config() for (stype, sfunc), service_type in subscribe_services.items(): if stype not in config.api: continue service_conf = list(config.api[stype].values())[0] service = service_type(service_conf) for name, channels in SubscribeSource.get_subs(stype, sfunc): for item in service.subscribe_index(name): if not ItemInfo.exists(item.service, item.item_id): ItemInfo.add_index(item, channels) print(stype.value, sfunc, name, item) ItemInfo.set_status(item.service, item.item_id, TaskStage.Fetching, TaskStatus.Queued) for item in service.subscribe_full(name): if not ItemInfo.exists(item.service, item.item_id): ItemInfo.add_item(item, channels) print(stype.value, sfunc, name, item) ItemInfo.set_status(item.service, item.item_id, TaskStage.Downloading, TaskStatus.Queued)
def _pipeline(pipeline_name): config = load_config() pipeline = config.pipeline[pipeline_name] subs = [] for s in pipeline.subscribe: l = [(n, subscribe_services[s.service].get_title(n), subscribe_services[s.service].get_url(n)) for n, channels in SubscribeSource.get_subs_by_channel( *s.service, pipeline_name)] subss = subscribe_services[s.service] options = subss.options() subs.append((s.service[0].value, s.service[1], l, len(l), options)) status = ItemInfo.count_status() return render_template('pipeline.jinja2', pipeline_name=pipeline_name, subs=subs, status=status)
def _index(): pipelines = load_config().pipeline.items() status = ItemInfo.count_status() return render_template('index.jinja2', pipelines=pipelines, status=status)
def download_images(): for item in ItemInfo.poll_status(TaskStage.Downloading, TaskStatus.Queued, limit=config.limit.download): service = get_service(item.service) ItemInfo.set_status(item.service, item.item_id, TaskStage.Downloading, TaskStatus.Pending) try: for url in item.image_urls: print(url) raw = service.download_item_image(item, url) img = Image.open(raw) with BytesIO() as buf: img.save(buf, format="PNG") buf.seek(0) ItemInfo.save_image(item, url, buf) time.sleep(1) if item.attachment_urls: ctr = itertools.count() for att_url in item.attachment_urls: for zf in service.extract_attachments(item, att_url): img = Image.open(zf) with BytesIO() as buf: img.save(buf, format="PNG") buf.seek(0) ItemInfo.save_attachment_image( item, next(ctr), buf) except Exception as err: traceback.print_exc() ItemInfo.set_status(item.service, item.item_id, TaskStage.Downloading, TaskStatus.Queued) else: ItemInfo.set_status(item.service, item.item_id, TaskStage.Posting, TaskStatus.Queued) ItemInfo.abandon_tasks(TaskStage.Downloading, TaskStatus.Queued, 20, TaskStage.Downloading, TaskStatus.Failed)
def update_index(): for item in ItemInfo.poll_status_index(TaskStage.Fetching, TaskStatus.Queued, limit=config.limit.fetch): service = get_service(item.service) ItemInfo.set_status(item.service, item.item_id, TaskStage.Fetching, TaskStatus.Pending) try: full_item = service.pull_item(IndexItem(item.service, item.item_id)) except Exception as err: traceback.print_exc() ItemInfo.set_status(item.service, item.item_id, TaskStage.Fetching, TaskStatus.Queued) else: print(item, full_item) if full_item is None: ItemInfo.set_status(item.service, item.item_id, TaskStage.Fetching, TaskStatus.Failed) else: ItemInfo.add_item(full_item, []) ItemInfo.set_status(item.service, item.item_id, TaskStage.Downloading, TaskStatus.Queued) time.sleep(1) ItemInfo.abandon_tasks(TaskStage.Fetching, TaskStatus.Queued, 20, TaskStage.Fetching, TaskStatus.Failed)
def clean_cache(): for item in ItemInfo.poll_status(TaskStage.Cleaning, TaskStatus.Queued): ItemInfo.clean_cache(item) ItemInfo.set_status(item.service, item.item_id, TaskStage.Done, TaskStatus.Queued)
def _reset_failure(): service = ServiceType(request.form.get("service")) stage = TaskStage(request.form.get("stage")) item_id = request.form.get("item_id") ItemInfo.retry_failure(service, item_id) return redirect(f'/failures/{service.value}/{stage.value}')