Exemple #1
0
def extract(user):
    url = arg_str('url', default=None)
    type = arg_str('type', default='article')
    force_refresh = arg_bool('force_refresh', default=False)
    format = arg_str('format', default='json')

    if not url:
        raise RequestError("A url is required.")

    if force_refresh:
        extract_cache.debug = True

    cr = extract_cache.get(url, type)
    if not cr:
        extract_cache.invalidate(url, type)
        raise InternalServerError('Something went wrong. Try again.')

    resp = {
        'cache': cr,
        'data': cr.value
    }

    if format == 'html':
        return render_template(
            'extract_preview.html',
            data=resp)

    return jsonify(resp)
Exemple #2
0
def bulkworker(job_id, **qkw):
    """
    Fetch a job and execute it.
    """
    start = time.time()
    try:
        k = qkw['job_key_fmt'].format(job_id)
        job = rds.get(k)
        if not job:
            raise InternalServerError(
                'An unexpected error occurred while processing bulk upload.')

        if qkw['serializer'] == 'json':
            job = json_to_obj(job)

        elif qkw['serializer'] == 'pickle':
            job = pickle_to_obj(job)

        data = job.pop('data', [])
        job = job.pop('kw', {})

        # delete them
        rds.delete(k)

        # chunk list
        chunked_data = util.chunk_list(data, qkw.get('chunk_size'))

        # partial funtion
        load_fx = partial(ingest.source, **job)

        # pooled execution
        pool = Pool(qkw.get('max_workers', MAX_WORKERS))

        for res in pool.imap_unordered(load_fx, chunked_data):
            pass
        return True

    except Exception:
        tb = format_exc()
        raise RequestError('An Error Ocurred while running {}:\n{}'.format(
            job_id, tb))

    except JobTimeoutException:
        end = time.time()
        raise InternalServerError(
            'Bulk loading timed out after {} seconds'.format(end - start))
def refresh_content_comparisons(user, org):
    """
    Refresh content comparisons
    """
    comparisons_cache.invalidate(org.id)
    cr = comparisons_cache.get(org.id)
    if not cr.is_cached:
        return jsonify({'success': True})
    raise InternalServerError(
        'Something went wrong with the comparison cache invalidation process.')
def refresh_one_content_comparisons(user, org, type):
    """
    Get one content comparison.
    """
    if type not in CONTENT_METRIC_COMPARISONS:
        raise RequestError(
            "'{}' is an invalid content metric comparison. Choose from {}"
            .format(type, ", ".join(CONTENT_METRIC_COMPARISONS)))
    comparison_types[type].invalidate(org.id)
    cr = comparison_types[type].get(org.id)
    if not cr.is_cached:
        return jsonify({'success': True})
    raise InternalServerError(
        'Something went wrong with the comparison cache invalidation process.')
def get_all_content_comparisons(user, org):
    """
    Refresh content comparisons.
    """
    refresh = arg_bool('refresh', default=False)
    cache_details = arg_bool('cache_details', default=False)
    if refresh:
        comparisons_cache.invalidate(org.id)
    cr = comparisons_cache.get(org.id)
    if refresh and cr.is_cached:
        raise InternalServerError(
            'Something went wrong with the cache invalidation process.')
    if cache_details:
        return jsonify({'cache': cr, 'comparisons': cr.value})
    return jsonify(cr.value)
Exemple #6
0
def run_sous_chef(sous_chef_path, recipe_id, kw_key):
    """
    Do the work. This exists outside the class
    in order to enable pickling.
    """
    recipe = db.session.query(Recipe).get(recipe_id)
    try:
        # load in kwargs
        kw = rds.get(kw_key)
        if not kw:
            raise InternalServerError(
                'An unexpected error occurred while attempting to run a Sous Chef.'
            )
        kw = pickle_to_obj(kw)

        # delete them.
        rds.delete(kw_key)

        # import sous chef
        SousChef = import_sous_chef(sous_chef_path)

        # initialize it with kwargs
        sc = SousChef(**kw)

        # cook it.
        sc.cook()

        # update status and next job from sous chef.
        recipe.status = "stable"
        recipe.traceback = None
        # if something is set on this object, add it.
        if len(sc.next_job.keys()):
            recipe.last_job = sc.next_job
        db.session.add(recipe)
        db.session.commit()
        return True

    except Exception as e:

        # keep track of the error.

        db.session.rollback()
        recipe.status = "error"
        recipe.traceback = format_exc()
        db.session.add(recipe)
        db.session.commit()
        return MerlynneError(e)
def get_comparison(*args, **kwargs):
    """
    Get a single comparison.
    """
    level = kwargs.pop('level')
    type = kwargs.pop('type')
    level = parse_comparison_level(level)
    type = parse_comparison_type(type, level)
    refresh = arg_bool('refresh', default=False)
    fx = comparison_types[level][type]
    if refresh:
        fx.invalidate(*args, **kwargs)
    cr = fx.get(*args, **kwargs)
    if refresh and cr.is_cached:
        raise InternalServerError(
            'Something went wrong with the cache invalidation process.')
    return cr
def refresh_comparison(*args, **kwargs):
    """
    Refresh a single comparison.
    """

    # parse kwargs
    level = kwargs.pop('level')
    type = kwargs.pop('type')
    level = parse_comparison_level(level)
    type = parse_comparison_type(type, level)

    fx = comparison_types[level][type]
    fx.invalidate(*args, **kwargs)
    cr = fx.get(*args, **kwargs)
    if not cr.value or cr.is_cached:
        raise InternalServerError(
            'Something went wrong with the cache invalidation process.')
    return cr
def get_one_content_comparisons(user, org, type):
    """
    Get one content comparison.
    """
    # allow the urls to be pretty slugs :)
    type = type.replace('-', "_")
    if type not in CONTENT_METRIC_COMPARISONS:
        raise RequestError(
            "'{}' is an invalid content metric comparison. Choose from {}"
            .format(type, ", ".join(CONTENT_METRIC_COMPARISONS)))
    refresh = arg_bool('refresh', default=False)
    cache_details = arg_bool('cache_details', default=False)
    if refresh:
        comparison_types[type].invalidate(org.id)
    cr = comparison_types[type].get(org.id)
    if refresh and cr.is_cached:
        raise InternalServerError(
            'Something went wrong with the comparison cache invalidation process.')
    if cache_details:
        return jsonify({'cache': cr, 'comparison': cr.value.get(type)})
    return jsonify(cr.value.get(type))
Exemple #10
0
    def load_all(self, kwargs_key):
        """
        Do the work.
        """
        start = time.time()
        try:
            # create a session specific to this task
            session = gen_session()

            # get the inputs from redis
            kwargs = self.redis.get(kwargs_key)
            if not kwargs:
                raise InternalServerError(
                    'An unexpected error occurred while processing bulk upload.'
                )

            kwargs = pickle_to_obj(kwargs)
            data = kwargs.get('data')
            kw = kwargs.get('kw')

            # delete them
            self.redis.delete(kwargs_key)

            outputs = []
            errors = []

            fx = partial(self._load_one, **kw)

            if self.concurrent:
                pool = Pool(min([len(data), self.max_workers]))
                for res in pool.imap_unordered(fx, data):
                    if isinstance(res, Exception):
                        errors.append(res)
                    else:
                        outputs.append(res)
            else:
                for item in data:
                    res = fx(item)
                    if isinstance(res, Exception):
                        errors.append(res)
                    else:
                        outputs.append(res)

            # return errors
            if len(errors):
                self._handle_errors(errors)

            # add objects and execute
            if self.returns == 'model':
                for o in outputs:
                    if o is not None:
                        try:
                            session.add(o)
                            session.commit(o)
                        except Exception as e:
                            self._handle_errors(e)

            # union all queries
            elif self.returns == 'query':
                for query in outputs:
                    if query is not None:
                        try:
                            session.execute(query)
                        except Exception as e:
                            self._handle_errors(e)

            try:
                session.commit()

            except Exception as e:
                session.rollback()
                session.remove()
                self._handle_errors(e)

            # return true if everything worked.
            session.close()
            return True

        except JobTimeoutException:
            end = time.time()
            return InternalServerError(
                'Bulk loading timed out after {} seconds'.format(end - start))
Exemple #11
0
def run(sous_chef_path, recipe_id, kw_key, **kw):
    """
    Do the work. This exists outside the class
    in order to enable pickling for the task queue.
    """
    recipe = db.session.query(Recipe).get(recipe_id)
    try:
        if kw_key:
            # load in kwargs
            kw = rds.get(kw_key)
            if not kw:
                raise InternalServerError(
                    'An unexpected error occurred while attempting to run a Sous Chef.'
                )
            kw = pickle_to_obj(kw)
            # delete them.
            rds.delete(kw_key)

        # import sous chef
        SousChef = sc_exec.from_import_path(sous_chef_path)

        # initialize it with kwargs
        kw['org'] = db.session\
            .query(Org).get(recipe.org.id)\
            .to_dict(incl_domains=True)
        kw['recipe'] = recipe.to_dict()
        sous_chef = SousChef(**kw)

        # indicate that the job is running
        if not kw.get('passthrough', False):
            recipe.status = 'running'
            db.session.add(recipe)
            db.session.commit()

        # cook it.
        data = sous_chef.cook()

        # passthrough the data.
        if kw.get('passthrough', False):
            return data

        # otherwise just exhaust the generator
        if isgenerator(data):
            data = list(data)

        # teardown this recipe
        sous_chef.teardown()

        # update status and next job from sous chef.
        recipe.status = "stable"
        recipe.traceback = None
        recipe.last_run = dates.now()
        if len(sous_chef.next_job.keys()):
            recipe.last_job = sous_chef.next_job
        db.session.add(recipe)
        db.session.commit()
        return True

    except:

        # always delete the kwargs.
        if kw_key:
            rds.delete(kw_key)

        if not kw.get('passthrough', False):
            db.session.rollback()
            recipe.status = "error"
            recipe.traceback = format_exc()
            recipe.last_run = dates.now()
            db.session.add(recipe)
            db.session.commit()

            # notification
            tb = format_exc()
            error_notification(recipe, tb)
            return MerlynneError(tb)

        raise MerlynneError(format_exc())