def extract(user): url = arg_str('url', default=None) type = arg_str('type', default='article') force_refresh = arg_bool('force_refresh', default=False) format = arg_str('format', default='json') if not url: raise RequestError("A url is required.") if force_refresh: extract_cache.debug = True cr = extract_cache.get(url, type) if not cr: extract_cache.invalidate(url, type) raise InternalServerError('Something went wrong. Try again.') resp = { 'cache': cr, 'data': cr.value } if format == 'html': return render_template( 'extract_preview.html', data=resp) return jsonify(resp)
def bulkworker(job_id, **qkw): """ Fetch a job and execute it. """ start = time.time() try: k = qkw['job_key_fmt'].format(job_id) job = rds.get(k) if not job: raise InternalServerError( 'An unexpected error occurred while processing bulk upload.') if qkw['serializer'] == 'json': job = json_to_obj(job) elif qkw['serializer'] == 'pickle': job = pickle_to_obj(job) data = job.pop('data', []) job = job.pop('kw', {}) # delete them rds.delete(k) # chunk list chunked_data = util.chunk_list(data, qkw.get('chunk_size')) # partial funtion load_fx = partial(ingest.source, **job) # pooled execution pool = Pool(qkw.get('max_workers', MAX_WORKERS)) for res in pool.imap_unordered(load_fx, chunked_data): pass return True except Exception: tb = format_exc() raise RequestError('An Error Ocurred while running {}:\n{}'.format( job_id, tb)) except JobTimeoutException: end = time.time() raise InternalServerError( 'Bulk loading timed out after {} seconds'.format(end - start))
def refresh_content_comparisons(user, org): """ Refresh content comparisons """ comparisons_cache.invalidate(org.id) cr = comparisons_cache.get(org.id) if not cr.is_cached: return jsonify({'success': True}) raise InternalServerError( 'Something went wrong with the comparison cache invalidation process.')
def refresh_one_content_comparisons(user, org, type): """ Get one content comparison. """ if type not in CONTENT_METRIC_COMPARISONS: raise RequestError( "'{}' is an invalid content metric comparison. Choose from {}" .format(type, ", ".join(CONTENT_METRIC_COMPARISONS))) comparison_types[type].invalidate(org.id) cr = comparison_types[type].get(org.id) if not cr.is_cached: return jsonify({'success': True}) raise InternalServerError( 'Something went wrong with the comparison cache invalidation process.')
def get_all_content_comparisons(user, org): """ Refresh content comparisons. """ refresh = arg_bool('refresh', default=False) cache_details = arg_bool('cache_details', default=False) if refresh: comparisons_cache.invalidate(org.id) cr = comparisons_cache.get(org.id) if refresh and cr.is_cached: raise InternalServerError( 'Something went wrong with the cache invalidation process.') if cache_details: return jsonify({'cache': cr, 'comparisons': cr.value}) return jsonify(cr.value)
def run_sous_chef(sous_chef_path, recipe_id, kw_key): """ Do the work. This exists outside the class in order to enable pickling. """ recipe = db.session.query(Recipe).get(recipe_id) try: # load in kwargs kw = rds.get(kw_key) if not kw: raise InternalServerError( 'An unexpected error occurred while attempting to run a Sous Chef.' ) kw = pickle_to_obj(kw) # delete them. rds.delete(kw_key) # import sous chef SousChef = import_sous_chef(sous_chef_path) # initialize it with kwargs sc = SousChef(**kw) # cook it. sc.cook() # update status and next job from sous chef. recipe.status = "stable" recipe.traceback = None # if something is set on this object, add it. if len(sc.next_job.keys()): recipe.last_job = sc.next_job db.session.add(recipe) db.session.commit() return True except Exception as e: # keep track of the error. db.session.rollback() recipe.status = "error" recipe.traceback = format_exc() db.session.add(recipe) db.session.commit() return MerlynneError(e)
def get_comparison(*args, **kwargs): """ Get a single comparison. """ level = kwargs.pop('level') type = kwargs.pop('type') level = parse_comparison_level(level) type = parse_comparison_type(type, level) refresh = arg_bool('refresh', default=False) fx = comparison_types[level][type] if refresh: fx.invalidate(*args, **kwargs) cr = fx.get(*args, **kwargs) if refresh and cr.is_cached: raise InternalServerError( 'Something went wrong with the cache invalidation process.') return cr
def refresh_comparison(*args, **kwargs): """ Refresh a single comparison. """ # parse kwargs level = kwargs.pop('level') type = kwargs.pop('type') level = parse_comparison_level(level) type = parse_comparison_type(type, level) fx = comparison_types[level][type] fx.invalidate(*args, **kwargs) cr = fx.get(*args, **kwargs) if not cr.value or cr.is_cached: raise InternalServerError( 'Something went wrong with the cache invalidation process.') return cr
def get_one_content_comparisons(user, org, type): """ Get one content comparison. """ # allow the urls to be pretty slugs :) type = type.replace('-', "_") if type not in CONTENT_METRIC_COMPARISONS: raise RequestError( "'{}' is an invalid content metric comparison. Choose from {}" .format(type, ", ".join(CONTENT_METRIC_COMPARISONS))) refresh = arg_bool('refresh', default=False) cache_details = arg_bool('cache_details', default=False) if refresh: comparison_types[type].invalidate(org.id) cr = comparison_types[type].get(org.id) if refresh and cr.is_cached: raise InternalServerError( 'Something went wrong with the comparison cache invalidation process.') if cache_details: return jsonify({'cache': cr, 'comparison': cr.value.get(type)}) return jsonify(cr.value.get(type))
def load_all(self, kwargs_key): """ Do the work. """ start = time.time() try: # create a session specific to this task session = gen_session() # get the inputs from redis kwargs = self.redis.get(kwargs_key) if not kwargs: raise InternalServerError( 'An unexpected error occurred while processing bulk upload.' ) kwargs = pickle_to_obj(kwargs) data = kwargs.get('data') kw = kwargs.get('kw') # delete them self.redis.delete(kwargs_key) outputs = [] errors = [] fx = partial(self._load_one, **kw) if self.concurrent: pool = Pool(min([len(data), self.max_workers])) for res in pool.imap_unordered(fx, data): if isinstance(res, Exception): errors.append(res) else: outputs.append(res) else: for item in data: res = fx(item) if isinstance(res, Exception): errors.append(res) else: outputs.append(res) # return errors if len(errors): self._handle_errors(errors) # add objects and execute if self.returns == 'model': for o in outputs: if o is not None: try: session.add(o) session.commit(o) except Exception as e: self._handle_errors(e) # union all queries elif self.returns == 'query': for query in outputs: if query is not None: try: session.execute(query) except Exception as e: self._handle_errors(e) try: session.commit() except Exception as e: session.rollback() session.remove() self._handle_errors(e) # return true if everything worked. session.close() return True except JobTimeoutException: end = time.time() return InternalServerError( 'Bulk loading timed out after {} seconds'.format(end - start))
def run(sous_chef_path, recipe_id, kw_key, **kw): """ Do the work. This exists outside the class in order to enable pickling for the task queue. """ recipe = db.session.query(Recipe).get(recipe_id) try: if kw_key: # load in kwargs kw = rds.get(kw_key) if not kw: raise InternalServerError( 'An unexpected error occurred while attempting to run a Sous Chef.' ) kw = pickle_to_obj(kw) # delete them. rds.delete(kw_key) # import sous chef SousChef = sc_exec.from_import_path(sous_chef_path) # initialize it with kwargs kw['org'] = db.session\ .query(Org).get(recipe.org.id)\ .to_dict(incl_domains=True) kw['recipe'] = recipe.to_dict() sous_chef = SousChef(**kw) # indicate that the job is running if not kw.get('passthrough', False): recipe.status = 'running' db.session.add(recipe) db.session.commit() # cook it. data = sous_chef.cook() # passthrough the data. if kw.get('passthrough', False): return data # otherwise just exhaust the generator if isgenerator(data): data = list(data) # teardown this recipe sous_chef.teardown() # update status and next job from sous chef. recipe.status = "stable" recipe.traceback = None recipe.last_run = dates.now() if len(sous_chef.next_job.keys()): recipe.last_job = sous_chef.next_job db.session.add(recipe) db.session.commit() return True except: # always delete the kwargs. if kw_key: rds.delete(kw_key) if not kw.get('passthrough', False): db.session.rollback() recipe.status = "error" recipe.traceback = format_exc() recipe.last_run = dates.now() db.session.add(recipe) db.session.commit() # notification tb = format_exc() error_notification(recipe, tb) return MerlynneError(tb) raise MerlynneError(format_exc())