def create_app(config={}): app = Flask('aleph') app.config.from_object(settings) app.config.update(config) if 'postgres' not in settings.DATABASE_URI: raise RuntimeError("aleph database must be PostgreSQL!") app.config.update({'SQLALCHEMY_DATABASE_URI': settings.DATABASE_URI}) queues = ( Queue(WORKER_QUEUE, routing_key=WORKER_ROUTING_KEY), Queue(USER_QUEUE, routing_key=USER_ROUTING_KEY), ) celery.conf.update( imports=('aleph.queues'), broker_url=settings.BROKER_URI, # broker_connection_retry=False, broker_connection_max_retries=3, broker_pool_limit=None, task_always_eager=settings.EAGER, task_eager_propagates=True, task_ignore_result=True, task_acks_late=True, task_queues=queues, task_default_queue=WORKER_QUEUE, task_default_routing_key=WORKER_ROUTING_KEY, worker_max_tasks_per_child=500, worker_disable_rate_limits=True, result_persistent=False, beat_schedule={ 'alert-every-night': { 'task': 'aleph.logic.alerts.check_alerts', 'schedule': crontab(hour=1, minute=30) } }, ) migrate.init_app(app, db, directory=settings.ALEMBIC_DIR) configure_oauth(app) mail.init_app(app) db.init_app(app) CORS(app, origins=settings.CORS_ORIGINS) # Enable raven to submit issues to sentry if a DSN is defined. This will # report errors from Flask and Celery operation modes to Sentry. if settings.SENTRY_DSN: sentry.init_app(app, dsn=settings.SENTRY_DSN, logging=True, level=logging.ERROR) register_logger_signal(sentry.client) register_signal(sentry.client, ignore_expected=True) # This executes all registered init-time plugins so that other # applications can register their behaviour. for plugin in get_extensions('aleph.init'): plugin(app=app) return app
def get_analyzers(): if not len(ANALYZERS): analyzers = get_extensions('aleph.analyzers') analyzers = sorted(analyzers, key=lambda a: a.PRIORITY, reverse=True) for cls in analyzers: analyzer = cls() if not analyzer.active: continue ANALYZERS.append(analyzer) return ANALYZERS
def analyze_document(document): """Run analyzers (such as NER) on a given document.""" log.info("Analyze document [%s]: %s", document.id, document.name) analyzers = get_extensions('aleph.analyzers') analyzers = sorted(analyzers, key=lambda a: a.PRIORITY, reverse=True) for cls in analyzers: analyzer = cls() if not analyzer.active: continue try: analyzer.analyze(document) except Exception: log.exception("Analyzer %s failed.", cls) db.session.add(document) db.session.commit()
def create_app(config={}): app = Flask('aleph') app.config.from_object(settings) app.config.update(config) if 'postgres' not in settings.DATABASE_URI: raise RuntimeError("aleph database must be PostgreSQL!") app.config.update({ 'SQLALCHEMY_DATABASE_URI': settings.DATABASE_URI, 'BABEL_DOMAIN': 'aleph' }) queue = Queue(settings.QUEUE_NAME, routing_key=settings.QUEUE_ROUTING_KEY, queue_arguments={'x-max-priority': 9}) celery.conf.update( imports=('aleph.queues'), broker_url=settings.BROKER_URI, task_always_eager=settings.EAGER, task_eager_propagates=True, task_ignore_result=True, task_acks_late=False, task_queues=(queue,), task_default_queue=settings.QUEUE_NAME, task_default_routing_key=settings.QUEUE_ROUTING_KEY, worker_max_tasks_per_child=1000, result_persistent=False, beat_schedule={ 'hourly': { 'task': 'aleph.logic.scheduled.hourly', 'schedule': crontab(hour='*', minute=0) }, 'daily': { 'task': 'aleph.logic.scheduled.daily', 'schedule': crontab(hour=5, minute=0) } }, ) migrate.init_app(app, db, directory=settings.ALEMBIC_DIR) configure_oauth(app) mail.init_app(app) db.init_app(app) babel.init_app(app) CORS(app, origins=settings.CORS_ORIGINS) # Enable raven to submit issues to sentry if a DSN is defined. This will # report errors from Flask and Celery operation modes to Sentry. if settings.SENTRY_DSN: sentry.init_app(app, dsn=settings.SENTRY_DSN, logging=True, level=logging.ERROR) register_logger_signal(sentry.client) register_signal(sentry.client, ignore_expected=True) # This executes all registered init-time plugins so that other # applications can register their behaviour. for plugin in get_extensions('aleph.init'): plugin(app=app) # Set up opencensus tracing and its integrations. Export collected traces # to Stackdriver Trace on a background thread. if settings.STACKDRIVER_TRACE_PROJECT_ID: exporter = stackdriver_exporter.StackdriverExporter( project_id=settings.STACKDRIVER_TRACE_PROJECT_ID, transport=BackgroundThreadTransport ) sampler = probability.ProbabilitySampler( rate=settings.TRACE_SAMPLING_RATE ) blacklist_paths = ['/healthz', ] FlaskMiddleware( app, exporter=exporter, sampler=sampler, blacklist_paths=blacklist_paths ) integrations = ['postgresql', 'sqlalchemy', 'httplib'] config_integration.trace_integrations(integrations) # Set up logging setup_stackdriver_logging() return app
def create_app(config={}): app = Flask('aleph') app.config.from_object(settings) app.config.update(config) if 'postgres' not in settings.DATABASE_URI: raise RuntimeError("aleph database must be PostgreSQL!") app.config.update({ 'SQLALCHEMY_DATABASE_URI': settings.DATABASE_URI, 'BABEL_DOMAIN': 'aleph' }) queue = Queue(settings.QUEUE_NAME, routing_key=settings.QUEUE_ROUTING_KEY, queue_arguments={'x-max-priority': 9}) celery.conf.update( imports=('aleph.queues'), broker_url=settings.BROKER_URI, task_always_eager=settings.EAGER, task_eager_propagates=True, task_ignore_result=True, task_acks_late=True, task_queues=(queue, ), task_default_queue=settings.QUEUE_NAME, task_default_routing_key=settings.QUEUE_ROUTING_KEY, worker_max_tasks_per_child=500, result_persistent=False, beat_schedule={ 'hourly': { 'task': 'aleph.logic.scheduled.hourly', 'schedule': crontab(hour='*', minute=0) }, 'daily': { 'task': 'aleph.logic.scheduled.daily', 'schedule': crontab(hour=5, minute=0) } }, ) migrate.init_app(app, db, directory=settings.ALEMBIC_DIR) configure_oauth(app) mail.init_app(app) db.init_app(app) babel.init_app(app) cache.init_app(app, config={'CACHE_TYPE': 'simple'}) CORS(app, origins=settings.CORS_ORIGINS) # Enable raven to submit issues to sentry if a DSN is defined. This will # report errors from Flask and Celery operation modes to Sentry. if settings.SENTRY_DSN: sentry.init_app(app, dsn=settings.SENTRY_DSN, logging=True, level=logging.ERROR) register_logger_signal(sentry.client) register_signal(sentry.client, ignore_expected=True) # This executes all registered init-time plugins so that other # applications can register their behaviour. for plugin in get_extensions('aleph.init'): plugin(app=app) return app