Esempio n. 1
0
def create_app(config={}):
    app = Flask('aleph')
    app.config.from_object(settings)
    app.config.update(config)

    if 'postgres' not in settings.DATABASE_URI:
        raise RuntimeError("aleph database must be PostgreSQL!")

    app.config.update({'SQLALCHEMY_DATABASE_URI': settings.DATABASE_URI})

    queues = (
        Queue(WORKER_QUEUE, routing_key=WORKER_ROUTING_KEY),
        Queue(USER_QUEUE, routing_key=USER_ROUTING_KEY),
    )
    celery.conf.update(
        imports=('aleph.queues'),
        broker_url=settings.BROKER_URI,
        # broker_connection_retry=False,
        broker_connection_max_retries=3,
        broker_pool_limit=None,
        task_always_eager=settings.EAGER,
        task_eager_propagates=True,
        task_ignore_result=True,
        task_acks_late=True,
        task_queues=queues,
        task_default_queue=WORKER_QUEUE,
        task_default_routing_key=WORKER_ROUTING_KEY,
        worker_max_tasks_per_child=500,
        worker_disable_rate_limits=True,
        result_persistent=False,
        beat_schedule={
            'alert-every-night': {
                'task': 'aleph.logic.alerts.check_alerts',
                'schedule': crontab(hour=1, minute=30)
            }
        },
    )

    migrate.init_app(app, db, directory=settings.ALEMBIC_DIR)
    configure_oauth(app)
    mail.init_app(app)
    db.init_app(app)
    CORS(app, origins=settings.CORS_ORIGINS)

    # Enable raven to submit issues to sentry if a DSN is defined. This will
    # report errors from Flask and Celery operation modes to Sentry.
    if settings.SENTRY_DSN:
        sentry.init_app(app,
                        dsn=settings.SENTRY_DSN,
                        logging=True,
                        level=logging.ERROR)
        register_logger_signal(sentry.client)
        register_signal(sentry.client, ignore_expected=True)

    # This executes all registered init-time plugins so that other
    # applications can register their behaviour.
    for plugin in get_extensions('aleph.init'):
        plugin(app=app)
    return app
def get_analyzers():
    if not len(ANALYZERS):
        analyzers = get_extensions('aleph.analyzers')
        analyzers = sorted(analyzers, key=lambda a: a.PRIORITY, reverse=True)
        for cls in analyzers:
            analyzer = cls()
            if not analyzer.active:
                continue
            ANALYZERS.append(analyzer)
    return ANALYZERS
Esempio n. 3
0
def analyze_document(document):
    """Run analyzers (such as NER) on a given document."""
    log.info("Analyze document [%s]: %s", document.id, document.name)
    analyzers = get_extensions('aleph.analyzers')
    analyzers = sorted(analyzers, key=lambda a: a.PRIORITY, reverse=True)

    for cls in analyzers:
        analyzer = cls()
        if not analyzer.active:
            continue
        try:
            analyzer.analyze(document)
        except Exception:
            log.exception("Analyzer %s failed.", cls)

    db.session.add(document)
    db.session.commit()
Esempio n. 4
0
def create_app(config={}):
    app = Flask('aleph')
    app.config.from_object(settings)
    app.config.update(config)

    if 'postgres' not in settings.DATABASE_URI:
        raise RuntimeError("aleph database must be PostgreSQL!")

    app.config.update({
        'SQLALCHEMY_DATABASE_URI': settings.DATABASE_URI,
        'BABEL_DOMAIN': 'aleph'
    })

    queue = Queue(settings.QUEUE_NAME,
                  routing_key=settings.QUEUE_ROUTING_KEY,
                  queue_arguments={'x-max-priority': 9})
    celery.conf.update(
        imports=('aleph.queues'),
        broker_url=settings.BROKER_URI,
        task_always_eager=settings.EAGER,
        task_eager_propagates=True,
        task_ignore_result=True,
        task_acks_late=False,
        task_queues=(queue,),
        task_default_queue=settings.QUEUE_NAME,
        task_default_routing_key=settings.QUEUE_ROUTING_KEY,
        worker_max_tasks_per_child=1000,
        result_persistent=False,
        beat_schedule={
            'hourly': {
                'task': 'aleph.logic.scheduled.hourly',
                'schedule': crontab(hour='*', minute=0)
            },
            'daily': {
                'task': 'aleph.logic.scheduled.daily',
                'schedule': crontab(hour=5, minute=0)
            }
        },
    )

    migrate.init_app(app, db, directory=settings.ALEMBIC_DIR)
    configure_oauth(app)
    mail.init_app(app)
    db.init_app(app)
    babel.init_app(app)
    CORS(app, origins=settings.CORS_ORIGINS)

    # Enable raven to submit issues to sentry if a DSN is defined. This will
    # report errors from Flask and Celery operation modes to Sentry.
    if settings.SENTRY_DSN:
        sentry.init_app(app,
                        dsn=settings.SENTRY_DSN,
                        logging=True,
                        level=logging.ERROR)
        register_logger_signal(sentry.client)
        register_signal(sentry.client, ignore_expected=True)

    # This executes all registered init-time plugins so that other
    # applications can register their behaviour.
    for plugin in get_extensions('aleph.init'):
        plugin(app=app)
    # Set up opencensus tracing and its integrations. Export collected traces
    # to Stackdriver Trace on a background thread.
    if settings.STACKDRIVER_TRACE_PROJECT_ID:
        exporter = stackdriver_exporter.StackdriverExporter(
            project_id=settings.STACKDRIVER_TRACE_PROJECT_ID,
            transport=BackgroundThreadTransport
        )
        sampler = probability.ProbabilitySampler(
            rate=settings.TRACE_SAMPLING_RATE
        )
        blacklist_paths = ['/healthz', ]
        FlaskMiddleware(
            app, exporter=exporter, sampler=sampler,
            blacklist_paths=blacklist_paths
        )
        integrations = ['postgresql', 'sqlalchemy', 'httplib']
        config_integration.trace_integrations(integrations)
        # Set up logging
        setup_stackdriver_logging()
    return app
Esempio n. 5
0
def create_app(config={}):
    app = Flask('aleph')
    app.config.from_object(settings)
    app.config.update(config)

    if 'postgres' not in settings.DATABASE_URI:
        raise RuntimeError("aleph database must be PostgreSQL!")

    app.config.update({
        'SQLALCHEMY_DATABASE_URI': settings.DATABASE_URI,
        'BABEL_DOMAIN': 'aleph'
    })

    queue = Queue(settings.QUEUE_NAME,
                  routing_key=settings.QUEUE_ROUTING_KEY,
                  queue_arguments={'x-max-priority': 9})
    celery.conf.update(
        imports=('aleph.queues'),
        broker_url=settings.BROKER_URI,
        task_always_eager=settings.EAGER,
        task_eager_propagates=True,
        task_ignore_result=True,
        task_acks_late=True,
        task_queues=(queue, ),
        task_default_queue=settings.QUEUE_NAME,
        task_default_routing_key=settings.QUEUE_ROUTING_KEY,
        worker_max_tasks_per_child=500,
        result_persistent=False,
        beat_schedule={
            'hourly': {
                'task': 'aleph.logic.scheduled.hourly',
                'schedule': crontab(hour='*', minute=0)
            },
            'daily': {
                'task': 'aleph.logic.scheduled.daily',
                'schedule': crontab(hour=5, minute=0)
            }
        },
    )

    migrate.init_app(app, db, directory=settings.ALEMBIC_DIR)
    configure_oauth(app)
    mail.init_app(app)
    db.init_app(app)
    babel.init_app(app)
    cache.init_app(app, config={'CACHE_TYPE': 'simple'})
    CORS(app, origins=settings.CORS_ORIGINS)

    # Enable raven to submit issues to sentry if a DSN is defined. This will
    # report errors from Flask and Celery operation modes to Sentry.
    if settings.SENTRY_DSN:
        sentry.init_app(app,
                        dsn=settings.SENTRY_DSN,
                        logging=True,
                        level=logging.ERROR)
        register_logger_signal(sentry.client)
        register_signal(sentry.client, ignore_expected=True)

    # This executes all registered init-time plugins so that other
    # applications can register their behaviour.
    for plugin in get_extensions('aleph.init'):
        plugin(app=app)
    return app