예제 #1
0
    def to_dict(self, **kw):
        incl_reports = kw.get("incl_reports", True)
        d = {
            'id': self.id,
            'org_id': self.org_id,
            'sous_chef': self.sous_chef.slug,
            'name': self.name,
            'slug': self.slug,
            'description': self.description,
            'created': self.created,
            'updated': self.updated,
            'last_run': self.last_run,
            'schedule_by': self.schedule_by,
            'crontab': self.crontab,
            'time_of_day': self.time_of_day,
            'minutes': self.minutes,
            'status': self.status,
            'traceback': self.traceback,
            'last_job': self.last_job,
            'options': pickle_to_obj(self.options)
        }

        if 'metrics' in self.sous_chef.creates:
            d['metrics'] = self.metric_names

        if incl_reports:
            if 'report' in self.sous_chef.creates:
                d['reports'] = self.report_names

        return d
예제 #2
0
def bulkworker(job_id, **qkw):
    """
    Fetch a job and execute it.
    """
    start = time.time()
    try:
        k = qkw['job_key_fmt'].format(job_id)
        job = rds.get(k)
        if not job:
            raise InternalServerError(
                'An unexpected error occurred while processing bulk upload.'
            )

        if qkw['serializer'] == 'json':
            job = json_to_obj(job)

        elif qkw['serializer'] == 'pickle':
            job = pickle_to_obj(job)

        data = job.pop('data', [])
        job = job.pop('kw', {})

        # delete them
        rds.delete(k)

        # chunk list
        chunked_data = util.chunk_list(data, qkw.get('chunk_size'))

        # partial funtion
        load_fx = partial(ingest.source, **job)

        # pooled execution
        pool = Pool(qkw.get('max_workers', MAX_WORKERS))

        for res in pool.imap_unordered(load_fx, chunked_data):
            pass
        return True

    except Exception:
        tb = format_exc()
        raise RequestError('An Error Ocurred while running {}:\n{}'.format(job_id, tb))

    except JobTimeoutException:
        end = time.time()
        raise InternalServerError(
            'Bulk loading timed out after {} seconds'
            .format(end-start))
예제 #3
0
def run_sous_chef(sous_chef_path, recipe_id, kw_key):
    """
    Do the work. This exists outside the class
    in order to enable pickling.
    """
    recipe = db.session.query(Recipe).get(recipe_id)
    try:
        # load in kwargs
        kw = rds.get(kw_key)
        if not kw:
            raise InternalServerError(
                'An unexpected error occurred while attempting to run a Sous Chef.'
            )
        kw = pickle_to_obj(kw)

        # delete them.
        rds.delete(kw_key)

        # import sous chef
        SousChef = import_sous_chef(sous_chef_path)

        # initialize it with kwargs
        sc = SousChef(**kw)

        # cook it.
        sc.cook()

        # update status and next job from sous chef.
        recipe.status = "stable"
        recipe.traceback = None
        # if something is set on this object, add it.
        if len(sc.next_job.keys()):
            recipe.last_job = sc.next_job
        db.session.add(recipe)
        db.session.commit()
        return True

    except Exception as e:

        # keep track of the error.

        db.session.rollback()
        recipe.status = "error"
        recipe.traceback = format_exc()
        db.session.add(recipe)
        db.session.commit()
        return MerlynneError(e)
예제 #4
0
def bulkworker(job_id, **qkw):
    """
    Fetch a job and execute it.
    """
    start = time.time()
    try:
        k = qkw['job_key_fmt'].format(job_id)
        job = rds.get(k)
        if not job:
            raise InternalServerError(
                'An unexpected error occurred while processing bulk upload.')

        if qkw['serializer'] == 'json':
            job = json_to_obj(job)

        elif qkw['serializer'] == 'pickle':
            job = pickle_to_obj(job)

        data = job.pop('data', [])
        job = job.pop('kw', {})

        # delete them
        rds.delete(k)

        # chunk list
        chunked_data = util.chunk_list(data, qkw.get('chunk_size'))

        # partial funtion
        load_fx = partial(ingest.source, **job)

        # pooled execution
        pool = Pool(qkw.get('max_workers', MAX_WORKERS))

        for res in pool.imap_unordered(load_fx, chunked_data):
            pass
        return True

    except Exception:
        tb = format_exc()
        raise RequestError('An Error Ocurred while running {}:\n{}'.format(
            job_id, tb))

    except JobTimeoutException:
        end = time.time()
        raise InternalServerError(
            'Bulk loading timed out after {} seconds'.format(end - start))
예제 #5
0
 def to_dict(self):
     d = {
         'id': self.id,
         'org_id': self.org_id,
         'sous_chef': self.sous_chef.slug,
         'name': self.name,
         'slug': self.slug,
         'description': self.description,
         'created': self.created,
         'updated': self.updated,
         'last_run': self.last_run,
         'schedule_by': self.schedule_by,
         'crontab': self.crontab,
         'time_of_day': self.time_of_day,
         'minutes': self.minutes,
         'status': self.status,
         'traceback': self.traceback,
         'last_job': self.last_job,
         'options': pickle_to_obj(self.options)
     }
     if self.sous_chef.creates == 'metrics':
         d['metrics'] = self.metrics
     return d
예제 #6
0
 def to_dict(self):
     d = {
         'id': self.id,
         'org_id': self.org_id,
         'sous_chef': self.sous_chef.slug,
         'name': self.name,
         'slug': self.slug,
         'description': self.description,
         'created': self.created,
         'updated': self.updated,
         'last_run': self.last_run,
         'schedule_by': self.schedule_by,
         'crontab': self.crontab,
         'time_of_day': self.time_of_day,
         'minutes': self.minutes,
         'status': self.status,
         'traceback': self.traceback,
         'last_job': self.last_job,
         'options': pickle_to_obj(self.options)
     }
     if self.sous_chef.creates == 'metrics':
         d['metrics'] = self.metrics
     return d
예제 #7
0
    def load_all(self, kwargs_key):
        """
        Do the work.
        """
        start = time.time()
        try:
            # create a session specific to this task
            session = gen_session()

            # get the inputs from redis
            kwargs = self.redis.get(kwargs_key)
            if not kwargs:
                raise InternalServerError(
                    'An unexpected error occurred while processing bulk upload.'
                )

            kwargs = pickle_to_obj(kwargs)
            data = kwargs.get('data')
            kw = kwargs.get('kw')

            # delete them
            self.redis.delete(kwargs_key)

            outputs = []
            errors = []

            fx = partial(self._load_one, **kw)

            if self.concurrent:
                pool = Pool(min([len(data), self.max_workers]))
                for res in pool.imap_unordered(fx, data):
                    if isinstance(res, Exception):
                        errors.append(res)
                    else:
                        outputs.append(res)
            else:
                for item in data:
                    res = fx(item)
                    if isinstance(res, Exception):
                        errors.append(res)
                    else:
                        outputs.append(res)

            # return errors
            if len(errors):
                self._handle_errors(errors)

            # add objects and execute
            if self.returns == 'model':
                for o in outputs:
                    if o is not None:
                        try:
                            session.add(o)
                            session.commit(o)
                        except Exception as e:
                            self._handle_errors(e)

            # union all queries
            elif self.returns == 'query':
                for query in outputs:
                    if query is not None:
                        try:
                            session.execute(query)
                        except Exception as e:
                            self._handle_errors(e)

            try:
                session.commit()

            except Exception as e:
                session.rollback()
                session.remove()
                self._handle_errors(e)

            # return true if everything worked.
            session.close()
            return True

        except JobTimeoutException:
            end = time.time()
            return InternalServerError(
                'Bulk loading timed out after {} seconds'
                .format(end-start))
예제 #8
0
def run(sous_chef_path, recipe_id, kw_key, **kw):
    """
    Do the work. This exists outside the class
    in order to enable pickling for the task queue.
    """
    recipe = db.session.query(Recipe).get(recipe_id)
    try:
        if kw_key:
            # load in kwargs
            kw = rds.get(kw_key)
            if not kw:
                raise InternalServerError(
                    'An unexpected error occurred while attempting to run a Sous Chef.'
                )
            kw = pickle_to_obj(kw)
            # delete them.
            rds.delete(kw_key)

        # import sous chef
        SousChef = sc_exec.from_import_path(sous_chef_path)

        # initialize it with kwargs
        kw['org'] = db.session\
            .query(Org).get(recipe.org.id)\
            .to_dict(incl_domains=True)
        kw['recipe'] = recipe.to_dict()
        sous_chef = SousChef(**kw)

        # indicate that the job is running
        if not kw.get('passthrough', False):
            recipe.status = 'running'
            db.session.add(recipe)
            db.session.commit()

        # cook it.
        data = sous_chef.cook()

        # passthrough the data.
        if kw.get('passthrough', False):
            return data

        # otherwise just exhaust the generator
        if isgenerator(data):
            data = list(data)

        # teardown this recipe
        sous_chef.teardown()

        # update status and next job from sous chef.
        recipe.status = "stable"
        recipe.traceback = None
        recipe.last_run = dates.now()
        if len(sous_chef.next_job.keys()):
            recipe.last_job = sous_chef.next_job
        db.session.add(recipe)
        db.session.commit()
        return True

    except:

        # always delete the kwargs.
        if kw_key:
            rds.delete(kw_key)

        if not kw.get('passthrough', False):
            db.session.rollback()
            recipe.status = "error"
            recipe.traceback = format_exc()
            recipe.last_run = dates.now()
            db.session.add(recipe)
            db.session.commit()

            # notification
            tb = format_exc()
            error_notification(recipe, tb)
            return MerlynneError(tb)

        raise MerlynneError(format_exc())
예제 #9
0
    def load_all(self, kwargs_key):
        """
        Do the work.
        """
        start = time.time()
        try:
            # create a session specific to this task
            session = gen_session()

            # get the inputs from redis
            kwargs = self.redis.get(kwargs_key)
            if not kwargs:
                raise InternalServerError(
                    'An unexpected error occurred while processing bulk upload.'
                )

            kwargs = pickle_to_obj(kwargs)
            data = kwargs.get('data')
            kw = kwargs.get('kw')

            # delete them
            self.redis.delete(kwargs_key)

            outputs = []
            errors = []

            fx = partial(self._load_one, **kw)

            if self.concurrent:
                pool = Pool(min([len(data), self.max_workers]))
                for res in pool.imap_unordered(fx, data):
                    if isinstance(res, Exception):
                        errors.append(res)
                    else:
                        outputs.append(res)
            else:
                for item in data:
                    res = fx(item)
                    if isinstance(res, Exception):
                        errors.append(res)
                    else:
                        outputs.append(res)

            # return errors
            if len(errors):
                self._handle_errors(errors)

            # add objects and execute
            if self.returns == 'model':
                for o in outputs:
                    if o is not None:
                        try:
                            session.add(o)
                            session.commit(o)
                        except Exception as e:
                            self._handle_errors(e)

            # union all queries
            elif self.returns == 'query':
                for query in outputs:
                    if query is not None:
                        try:
                            session.execute(query)
                        except Exception as e:
                            self._handle_errors(e)

            try:
                session.commit()

            except Exception as e:
                session.rollback()
                session.remove()
                self._handle_errors(e)

            # return true if everything worked.
            session.close()
            return True

        except JobTimeoutException:
            end = time.time()
            return InternalServerError(
                'Bulk loading timed out after {} seconds'.format(end - start))
예제 #10
0
 def deserialize(self, s):
     """
     The function for deserializing the string
     returned from redis
     """
     return pickle_to_obj(s)
예제 #11
0
def run_sous_chef(sous_chef_path, recipe_id, kw_key):
    """
    Do the work. This exists outside the class
    in order to enable pickling.
    """
    recipe = db.session.query(Recipe).get(recipe_id)
    try:
        # load in kwargs
        kw = rds.get(kw_key)
        if not kw:
            raise InternalServerError("An unexpected error occurred while attempting to run a Sous Chef.")
        kw = pickle_to_obj(kw)

        # delete them.
        rds.delete(kw_key)

        # import sous chef
        SousChef = import_sous_chef(sous_chef_path)

        # initialize it with kwargs
        sc = SousChef(**kw)

        # indicate that the job is running
        if not kw.get("passthrough", False):
            recipe.status = "running"
            db.session.add(recipe)
            db.session.commit()

        # cook it.
        data = sc.cook()

        # passthrough the data.
        if kw.get("passthrough", False):
            return data

        # otherwise just exhaust the generator
        if isgenerator(data):
            data = list(data)

        # teardown this recipe
        sc.teardown()

        # update status and next job from sous chef.
        recipe.status = "stable"
        recipe.traceback = None
        recipe.last_run = dates.now()
        if len(sc.next_job.keys()):
            recipe.last_job = sc.next_job
        db.session.add(recipe)
        db.session.commit()
        return True

    except Exception as e:

        # always delete the kwargs.
        rds.delete(kw_key)
        if kw.get("passthrough", False):
            raise MerlynneError(e)
        db.session.rollback()
        recipe.status = "error"
        recipe.traceback = format_exc()
        recipe.last_run = dates.now()
        db.session.add(recipe)
        db.session.commit()
        return MerlynneError(e)
예제 #12
0
 def deserialize(self, s):
     """
     The function for deserializing the string
     returned from redis
     """
     return pickle_to_obj(s)
예제 #13
0
def run(sous_chef_path, recipe_id, kw_key, **kw):
    """
    Do the work. This exists outside the class
    in order to enable pickling for the task queue.
    """
    recipe = db.session.query(Recipe).get(recipe_id)
    try:
        if kw_key:
            # load in kwargs
            kw = rds.get(kw_key)
            if not kw:
                raise InternalServerError(
                    'An unexpected error occurred while attempting to run a Sous Chef.'
                )
            kw = pickle_to_obj(kw)
            # delete them.
            rds.delete(kw_key)

        # import sous chef
        SousChef = sc_exec.from_import_path(sous_chef_path)

        # initialize it with kwargs
        kw['org'] = db.session\
            .query(Org).get(recipe.org.id)\
            .to_dict(incl_domains=True)
        kw['recipe'] = recipe.to_dict()
        sous_chef = SousChef(**kw)

        # indicate that the job is running
        if not kw.get('passthrough', False):
            recipe.status = 'running'
            db.session.add(recipe)
            db.session.commit()

        # cook it.
        data = sous_chef.cook()

        # passthrough the data.
        if kw.get('passthrough', False):
            return data

        # otherwise just exhaust the generator
        if isgenerator(data):
            data = list(data)

        # teardown this recipe
        sous_chef.teardown()

        # update status and next job from sous chef.
        recipe.status = "stable"
        recipe.traceback = None
        recipe.last_run = dates.now()
        if len(sous_chef.next_job.keys()):
            recipe.last_job = sous_chef.next_job
        db.session.add(recipe)
        db.session.commit()
        return True

    except:

        # always delete the kwargs.
        if kw_key:
            rds.delete(kw_key)

        if not kw.get('passthrough', False):
            db.session.rollback()
            recipe.status = "error"
            recipe.traceback = format_exc()
            recipe.last_run = dates.now()
            db.session.add(recipe)
            db.session.commit()

            # notification
            tb = format_exc()
            error_notification(recipe, tb)
            return MerlynneError(tb)

        raise MerlynneError(format_exc())