async def x(index):
            log = Log(f"{from_}=>{to_}#{index}")

            retries = retries_

            while True:
                found = False

                async for item in db.choose(M, {FIELD_NAME: from_}, {FIELD_NAME: to_}, limit_=limit):
                    log.info(item=item)
                    processed[(from_, to_)] += 1
                    await asyncio.sleep(sleep_time)
                    found = True

                if found:
                    break

                if retries == 0:
                    return

                retries -= 1

                log.warning(retry=retries_ - retries)

                await asyncio.sleep(sleep_time * sleep_coef)
            log.important("STOP")
Exemple #2
0
class _Tasks:
    def __init__(self):
        self.log = Log(self.__class__.__name__)

    async def remove_tasks(self, tasks: Dict[asyncio.Task, Any]):
        assert isinstance(tasks, dict)

        finished = tuple(task for task in tasks.keys() if task.done())
        for task in finished:
            if task.cancelled():
                self.log.warning("Task was cancelled: ", task=task)
            else:
                try:
                    result = await task
                    if result:
                        self.log.debug("Task say:", task=task, result=result)
                except RuntimeError:
                    raise
                except Exception:
                    self.log.exception("Task shout:", task=task)

            del tasks[task]

        return tasks
 def add_warning(self, text, *args, **kwargs):
     """Add a warning message to logger"""
     text = '%s %s: %s' % (self.name_prefix, self.name, text)
     Log.warning(text, *args, **kwargs)
    def get_load_avg(self):
        avg = os.getloadavg()[0]

        if avg >= self.max_avg:
            Log.warning('Average at %f (max: %d)' % (avg, self.max_avg))
            self.fire(load_alert(avg, self.max_avg, time.time()))
Exemple #5
0
class VK:
    def __init__(self, config_vk):
        self.log = Log("VK")

        self.config = config_vk

        self.additional_params = {
            'access_token': self.config.token,
            'lang': 'ru',
            'v': "5.103"
        }

        self.person_fields = ",".join([
            "first_name",
            "last_name",
            "deactivated",
            "verified",
            "sex",
            "bdate",
            "city",  # TODO: https://vk.com/dev/places.getCityById
            "country",  # TODO: https://vk.com/dev/places.getCountryById
            "home_town",
            "photo_400_orig",
            "online",
            "has_mobile",
            "contacts",
            "education",
            "universities",
            "schools",
            "last_seen",
            "occupation",
            "hidden",
        ])

        self.group_info_fields = ",".join([
            'id', 'name', 'type', 'photo_200', 'city', 'description', 'place'
        ])

        self.post_fields = ",".join([])

        self.session: aiohttp.ClientSession = None

        self.last_call = 0
        self.threshold = 1 / 3

        self._update_token = None

        self.query_lock = asyncio.Lock()

        self.stats = VKStats('vk', "VK API")

        self.auth_lock = asyncio.Lock()

    async def warm_up(self):
        self.session = aiohttp.ClientSession()

    async def call_method(self, method, **params):
        self.log.debug(method=method, params=params)

        self.stats.call_methods_count += 1

        try:
            while True:
                assert self.session is not None, "call `await .warm_up()` first"
                async with self.query_lock:
                    if time() - self.threshold < self.last_call:
                        self.log.deep("Sleep", threshold=self.threshold)
                        await asyncio.sleep(self.threshold)

                self.last_call = time()
                self.stats.queries += 1
                response = await self.session.get(
                    url=f"{self.config['api_host']}{method}",
                    params={
                        **params,
                        **self.additional_params
                    },
                    timeout=10)
                self.stats.by_type[method] += 1

                result = await response.json()

                if 'error' in result:
                    self.stats.errors += 1
                    vk_error = VKError(result['error'])
                    if vk_error.error_code == VKError.TOO_MANY_REQUESTS:
                        self.stats.errors_too_many += 1
                        self.threshold *= 1.1
                        if self.threshold > 1:
                            self.threshold = 1
                        self.log.warning("Too many requests",
                                         threshold=self.threshold)
                        continue

                    if vk_error.error_code == VKError.INVALID_SESSION:
                        self.log.warning(
                            "Need auth. Please run <app> <config> auth")
                        break

                    if vk_error.error_code == VKError.PROFILE_PRIVATE:
                        self.log.warning("Profile private",
                                         method=method,
                                         params=params)
                        break

                    if vk_error.error_code == VKError.DELETED_OR_BANNED:
                        self.log.warning("Profile deleted or banned",
                                         method=method,
                                         params=params)
                        break

                    if vk_error.error_code == VKError.RATE_LIMIT_REACHED:
                        self.log.important("RATE LIMIT")

                    raise vk_error
                else:
                    self.stats.success += 1
                    assert 'response' in result
                    self.threshold *= 0.991
                    return result['response']
        finally:
            self.stats.call_methods_count -= 1
            self.stats.threshold = self.threshold

    async def persons_info(self, *user_ids) -> Sequence[VKPerson]:
        answer = await self.call_method("users.get",
                                        user_ids=",".join(map(str, user_ids)),
                                        fields=self.person_fields)

        users = []

        for user_info in answer:
            users.append(VKPerson(**user_info))

        return users

    async def me(self) -> VKPerson:
        return (await self.persons_info(self.config.user_id))[0]

    async def group_info(self, group_id) -> VKGroup:
        answer = await self.call_method("groups.getById",
                                        group_id=group_id,
                                        fields=self.group_info_fields)
        assert len(answer) == 1
        group = answer[0]
        return VKGroup(**group)

    async def person_posts(self, person_id, count):
        return [post async for post in self._posts_count(person_id, count)]

    async def person_posts_iter(self, person_id, count=None):
        async for post in self._posts_count(person_id, count):
            yield post

    async def group_posts_iter(self, group_id, count=None):
        async for post in self._posts_count(-group_id, count):
            yield post

    async def comments_iter(self, owner_id, post_id, count=None):
        async for raw_data in self._offsetter(
                count,
                dict(
                    method='wall.getComments',
                    owner_id=owner_id,
                    post_id=post_id,
                    need_likes=1,
                    preview_length=0,
                    extended=0,
                    thread_items_count=10,
                )):
            comment = VKComment(**raw_data)
            comment.post_id = post_id
            comment.owner_id = owner_id
            yield comment

    async def group_posts(self, group_id, count=None, from_ts=None):
        if count is not None and from_ts is not None:
            raise ValueError("Use one of attribute: `count` or `from_ts`")

        return [post async for post in self._posts_count(-group_id, count)]

    async def _posts_count(self, owner_id, count):
        async for post in self._offsetter(
                count,
                dict(method="wall.get",
                     owner_id=owner_id,
                     fields=self.post_fields)):
            yield VKPost(**post)

    async def _offsetter(self, count, params):
        # TODO: Can be optimized! Use asyncio.gather after first query, Luke!
        if count is None:
            count = float("+inf")

        if count < 1:
            raise ValueError(f"{count=} must be more than 0")

        offset = 0
        items_count = count

        while offset < items_count:
            to_download = min(items_count - offset, 100)

            try:
                answer = await self.call_method(**params,
                                                count=to_download,
                                                offset=offset)
            except VKError:
                self.log.exception(params=params)
                raise

            if answer is None:
                # Good error in call_method
                return

            items_count = min(count, answer['count'])

            if to_download != len(answer['items']):
                if to_download < items_count:
                    self.log.warning("Downloaded items count:",
                                     wanted=to_download,
                                     actual=len(answer['items']))

            offset += to_download

            for item in answer['items']:
                yield item

    async def group_user_ids(self, group_id, count=None) -> Sequence[int]:
        users = []
        async for user_id in self.group_participants_iter(group_id, count):
            users.append(user_id)

        return users

    async def group_participants_iter(self, group_id, count=None):
        async for user_id in self._offsetter(
                count, dict(method="groups.getMembers", group_id=group_id)):
            yield user_id

    async def shutdown(self):
        if self.session:
            await self.session.close()
Exemple #6
0
class BaseWork:
    start_time = time()

    MUTE_EXCEPTION = True

    PARALLEL = 10
    INPUT_RETRIES = 0
    WAIT_COEF = 1

    need_stop = False

    work_ids: Dict[str, int] = {}

    def __init__(self):
        work_name = self.__class__.__name__
        self.log = Log(work_name)

        self.log.debug("Register work")
        self.work_ids.setdefault(work_name, -1)
        self.work_ids[work_name] += 1
        work_id = f"{work_name}_{self.work_ids[work_name]}"
        self.log.debug("Work registered", work_id=work_id)

        self.stat = Stats(work_id, work_name)

        self.log.debug("Run task manager")
        self.task_manager = TasksManager(self.PARALLEL)
        self.tasks: List[TaskInfo] = []

        self.state = "Base class initialized"

    @property
    def state(self):
        return self.stat.state

    @state.setter
    def state(self, value):
        self.stat.state = value
        self.log.debug(self.state)

    async def warm_up(self):
        pass

    async def input(self):
        yield
        raise NotImplementedError()

    async def process(self, item):
        yield
        raise NotImplementedError()

    async def update(self, result):
        raise NotImplementedError()

    async def shutdown(self):
        pass

    async def __call__(self):
        self.state = "🔥 Warming up"
        await self.warm_up()
        self.stat._start_time = time()

        try:
            await self.main_cycle()
        except Exception:
            self.log.exception("MAIN CYCLE")
            if not self.MUTE_EXCEPTION:
                raise

        self.stat.finished_time = time()

        self.state = "🛑 Shutdown"
        await self.shutdown()

        self.state = "🏁 Finished"

    async def main_cycle(self):
        self.state = "⌛️ Ready to start"
        await asyncio.gather(self._input_cycle(), self._result_cycle())

    async def _result_cycle(self):
        while True:
            try:
                result = await asyncio.wait_for(self.task_manager.take(), 1)
            except asyncio.TimeoutError:
                continue

            if isinstance(result, TasksManager.Finish):
                break

            await self.update(result)

            self.stat.updated_items += 1

    async def _input_cycle(self):
        self.stat.retries = 0

        while not self.need_stop:
            self.state = "🔎 Wait for new item"

            async for item in self.input():
                self.stat.input_items += 1
                await self.task_manager.put(self._run_task(TaskInfo(item)))
                self.stat.retries = None

            if self.INPUT_RETRIES == 0:
                # Need to run only one time
                self.need_stop = True
                continue

            if self.stat.retries is None:
                # Item found
                self.stat.retries = 0
                await asyncio.sleep(0)
                continue

            if self.stat.retries >= self.INPUT_RETRIES:
                self.log.warning("Too many retries, i'm done",
                                 retries=self.stat.retries)
                self.need_stop = True
                continue

            # Retry logic
            self.stat.retries += 1
            self.state = f"🔎 Wait items, repeat №{self.stat.retries}"
            await asyncio.sleep(self.stat.retries * self.WAIT_COEF)

        await self.task_manager.stop()

    async def _run_task(self, info: TaskInfo):
        self.tasks.append(info)

        info.update("🎬 Task started")

        info.update(f"🛠 Processing")

        async for result in self.process(info.item):
            self.stat.returned_items += 1
            info.update(f"🛠 {repr(result)}")
            yield result
            info.update(f"🛠 Processing")

        self.stat.processed_items += 1
        info.update("✅ Finish processing")

        if info.processed_callback:
            info.update("🤙 Run callback")

            self.log.info("Run processed callback",
                          processed_callback=info.processed_callback)
            await info.processed_callback

        self.stat.finished_items += 1

        info.update("🏁 Task finished")

        self.tasks.remove(info)

    async def take_error(self):
        return await self.task_manager.take_error()