예제 #1
0
def feedback_entity_task(entity_data: Dict[str, Any], entity_type: str):
    """
    This task is to feedback information about entity collected by updating
    data store.

    :param entity_data: The entity we're feeding back to the system
    :param entity_type: Type of the entity, a string representation
    """

    try:
        feedback_entity(entity_data, entity_type)
    except (PutError, UpdateError) as ex:
        if ErrorInspector.is_dynamo_throughput_error(ex):
            Measure.counter(
                feedback_entity_task.__name__ + '.throughput_exceptions',
                tags={
                    'entity_type':
                    entity_type,
                    'ad_account_id':
                    determine_ad_account_id(entity_data, entity_type)
                },
            ).increment()
            logger.info(str(ex))
        else:
            raise
예제 #2
0
def _fetch_job_report(job_id: str) -> Optional[JobReport]:
    """Retrieve job report from job report table (cached)."""
    try:
        report = JobReport.get(job_id)
        if report.fails_in_row and report.fails_in_row >= PERMANENTLY_FAILING_JOB_THRESHOLD:
            Measure.counter('permanently_failing_job').increment()
            logger.warning(
                f'[permanently-failing-job] Job with id {job_id} failed {report.fails_in_row}'
                f' times in a row.')
        return report
    except DoesNotExist:
        return None
예제 #3
0
def iter_entities_per_page_id(
    page_id: str, fields: List[str] = None, page_entity_types: List[str] = None
) -> Generator[Dict[str, Any], None, None]:
    if not page_entity_types:
        page_entity_models = page_entity_type_model_map.values()
    else:
        page_entity_models = [page_entity_type_model_map[entity_type] for entity_type in page_entity_types]

    _step = 1000

    for EntityModel in page_entity_models:
        cnt = 0

        with Measure.counter(
            __name__ + '.entities_per_page_id', tags={'ad_account_id': page_id, 'entity_type': EntityModel.entity_type}
        ) as cntr:

            for record in EntityModel.query(page_id, filter_condition=(EntityModel.is_accessible != False)):
                cnt += 1
                record_dict = record.to_dict(fields=fields, skip_null=True)
                # this is unfortunate, but we need to change page_id to ad_account_id
                record_dict['ad_account_id'] = record_dict['page_id']
                del record_dict['page_id']
                yield record_dict
                if cnt % _step == 0:
                    cntr += _step

            if cnt % _step:
                cntr += cnt % _step
예제 #4
0
def iter_entities_per_ad_account_id(
    ad_account_id: str, fields: List[str] = None, entity_types: List[str] = None
) -> Generator[Dict[str, Any], None, None]:
    # occasionally it's important to pass through
    # we are not overriding the values, but must pass some value
    # state in entity_models
    # There we treat explicit None, or empty array as "use default list"

    if not entity_types:
        # All types are returned
        entity_models = entity_type_model_map.values()
    else:
        # intentionally leaving this logic brittle
        # this function is linked to types "statically"
        # and is not expected to hide misses in the map.
        entity_models = [entity_type_model_map[entity_type] for entity_type in entity_types]

    _step = 1000

    for EntityModel in entity_models:
        cnt = 0

        with Measure.counter(
            __name__ + '.entities_per_ad_account_id',
            tags={'ad_account_id': ad_account_id, 'entity_type': EntityModel.entity_type},
        ) as cntr:

            for record in EntityModel.query(ad_account_id, filter_condition=(EntityModel.is_accessible != False)):
                cnt += 1
                yield record.to_dict(fields=fields, skip_null=True)
                if cnt % _step == 0:
                    cntr += _step

            if cnt % _step:
                cntr += cnt % _step
예제 #5
0
def _send_measurement_task_runtime(job_scope: JobScope, bucket: int):
    _measurement_base_name = f'{__name__}.report_tasks_outcome'
    _measurement_tags = {
        'ad_account_id': job_scope.ad_account_id,
        'sweep_id': job_scope.sweep_id,
        'report_type': job_scope.report_type,
        'report_variant': job_scope.report_variant,
        'bucket': bucket,
        'job_type': job_scope.job_type,
    }
    if job_scope.datapoint_count and job_scope.datapoint_count > 0:
        Measure.counter(f'{_measurement_base_name}.data_points',
                        tags=_measurement_tags).increment(
                            job_scope.datapoint_count)
        Measure.histogram(f'{_measurement_base_name}.data_points',
                          tags=_measurement_tags)(job_scope.datapoint_count)

    Measure.gauge(f'{_measurement_base_name}.running_time',
                  tags=_measurement_tags)(job_scope.running_time)
예제 #6
0
 def __init__(
     self,
     sweep_id: str,
     sweep_status_tracker: SweepStatusTracker,
     oozed_total: int,
     stop_waiting_time: float,
     *,
     wait_interval: int = 1,
 ):
     self.sweep_id = sweep_id
     self.sweep_status_tracker = sweep_status_tracker
     self.oozed_total = oozed_total
     self.stop_waiting_time = stop_waiting_time
     self.wait_interval = wait_interval
     self.counter = Measure.counter(f'{__name__}.done',
                                    tags={'sweep_id': sweep_id})
     self._last_total = 0
예제 #7
0
 def __init__(
     self,
     sweep_id: str,
     sweep_status_tracker: SweepStatusTracker,
     pulse_review_interval: int,
     stop_oozing_time: float,
     *,
     wait_interval: int = 1,
 ):
     self.sweep_id = sweep_id
     self.sweep_status_tracker = sweep_status_tracker
     self.pulse_review_interval = pulse_review_interval
     self.stop_oozing_time = stop_oozing_time
     self.wait_interval = wait_interval
     self.oozed_count = 0
     self.oozing_rate = OOZER_START_RATE
     self.counter = Measure.counter(f'{__name__}.oozed',
                                    tags={'sweep_id': sweep_id})
     self._rate_review_time = self._pulse_review_time = round(
         time.time()) - 1
     self._tasks_since_review = 0
예제 #8
0
 def send_measurement_error(error_type: str, ad_account_id: str):
     Measure.counter(__name__ + '.errors', {
         'error_type': error_type,
         'ad_account_id': ad_account_id
     }).increment()
예제 #9
0
def build_sweep(sweep_id: str):
    from sweep_builder.init_tokens import init_tokens
    from sweep_builder.pipeline import iter_pipeline
    from sweep_builder.reality_inferrer.reality import iter_reality_base

    try:
        _measurement_name_base = __name__ + '.' + build_sweep.__name__ + '.'
        _measurement_tags = {'sweep_id': sweep_id}

        # In the jobs persister we purposefully avoid persisting
        # anything besides the Job ID. This means that things like tokens
        # and other data on *Claim is lost.
        # As long as we are doing that, we need to leave tokens somewhere
        # for workers to pick up.
        logger.info(f"#{sweep_id} Prepositioning platform tokens")
        init_tokens(sweep_id)

        logger.info(f"#{sweep_id} Starting sweep building")

        # task_group = TaskGroup()
        delayed_tasks = []

        cnt = 0
        with Measure.counter(_measurement_name_base + 'outer_loop',
                             tags=_measurement_tags) as cntr:

            for reality_claim in iter_reality_base():
                # what we get here are Scope and AdAccount objects.
                # Children of AdAccount reality claims are to be processed
                # in separate Celery tasks. But we still have jobs
                # associated with Scopes objects, so
                # need to rate and store the jobs before chipping off
                # a separate task for each of AdAccounts.
                if reality_claim.entity_type == Entity.AdAccount:

                    # child_task_id = task_group.generate_task_id()
                    # task_group.report_task_active(child_task_id)

                    delayed_tasks.append(
                        # we are using Celery chord to process AdAccounts in parallel
                        # for very very large (hundreds of thousands) numbers of AdAccounts,
                        # chord management will be super memory expensive,
                        # as chord timer/controller will be looking at entire list on
                        # each tick.
                        # In that case, probably better to switch to
                        # a callback per handler + mutex/counter somewhere
                        build_sweep_slice_per_ad_account_task.si(
                            sweep_id,
                            reality_claim,
                            # task_id=child_task_id
                        ))
                elif reality_claim.entity_type == Entity.Page:
                    delayed_tasks.append(
                        build_sweep_slice_per_page.si(sweep_id, reality_claim))
                else:
                    cnt = 1
                    _step = 1000
                    for _ in iter_pipeline(sweep_id, [reality_claim]):
                        cnt += 1
                        if cnt % _step == 0:
                            cntr += _step
                            logger.info(
                                f'#{sweep_id}-root: Queueing up #{cnt}')

                    # because above counter communicates only increments of _step,
                    # we need to report remainder --- amount under _step
                    cntr += cnt % _step

        logger.info(f"#{sweep_id}-root: Queued up a total of {cnt} tasks")

        # # here we fan out actual work to celery workers
        # # and wait for all tasks to finish before returning
        group_result = group(delayed_tasks).delay()

        # In case the workers crash, go-away (scaling) or are otherwise
        # non-responsive, the following would wait indefinitely.
        # Since that's not desirable and the total sweep build time is minutes at
        # maximum, we add a reasonable timeout
        # Because we are not joining on the results, but actually periodically
        # looking for "you done yet?", we can exit if this threshold is busted, and
        # let the next run recover from the situation
        # You will nee
        should_be_done_by = time.time() + (60 * 20)

        Measure.gauge(f'{_measurement_name_base}per_account_sweep.total',
                      tags=_measurement_tags)(len(group_result.results))

        # Monitor the progress. Although this obviously can be achieved with
        # group_result.join(), we need to "see" into the task group progress
        with Measure.gauge(f'{_measurement_name_base}per_account_sweep.done',
                           tags=_measurement_tags) as measure_done:
            while True:
                done_counter = 0
                for result in group_result.results:
                    logger.debug(f'{result}: {result.state}')
                    if result.ready():
                        done_counter += 1

                logger.debug(
                    f"TOTAL: {done_counter}/{len(group_result.results)}")
                logger.debug("=" * 20)

                logger.debug("Checking group result")

                measure_done(done_counter)
                if group_result.ready():
                    logger.debug(f"#{sweep_id}-root: Sweep build complete")
                    break

                # Important. If we don't sleep, the native join in celery context
                # switches all the time and we end up with 100% cpu, eventually somehow
                # deadlocking the process. 5 seconds is kind of an arbitrary number, but
                # does what we need and the impact of a (potential) delay is absolutely
                # minimal
                time.sleep(5)

                # The last line of defense. Workers did not finish in time we
                # expected, no point waiting, kill it.
                if time.time() > should_be_done_by:
                    Measure.gauge(
                        f'{_measurement_name_base}per_account_sweep.early_exits',
                        tags=_measurement_tags)(1)
                    logger.warning(
                        "Exiting incomplete sweep build, it's taking too long")
                    return

        logger.info("Waiting on results join")
        if group_result.supports_native_join:
            group_result.join_native()
        else:
            # Eager mode does not support native join.
            group_result.join()

        # # alternative to Celery's native group_result.join()
        # # our manual task tracking code + join()
        # task_group.join()
        logger.info("Join complete, sweep build ended")
    except Exception as ex:
        ErrorInspector.inspect(ex, None, {'sweep_id': sweep_id})