Example #1
0
# Configuration defaults.
#
# You should never edit this file directly for deployment or in the developer
# setup. Wherever possible use environment variables to override the
# defaults.
import os
from servicelayer import env
from servicelayer import settings as sls
from flask_babel import lazy_gettext


# Show error messages to the user.
DEBUG = env.to_bool('ALEPH_DEBUG', False)
# Propose HTTP caching to the user agents.
CACHE = env.to_bool('ALEPH_CACHE', not DEBUG)
# Puts the system into read-only mode and displays a warning.
MAINTENANCE = env.to_bool('ALEPH_MAINTENANCE', False)
# Unit test context.
TESTING = False


###############################################################################
# General instance information

APP_TITLE = env.get('ALEPH_APP_TITLE', lazy_gettext('Aleph'))
APP_DESCRIPTION = env.get('ALEPH_APP_DESCRIPTION', '')
APP_NAME = env.get('ALEPH_APP_NAME', 'aleph')
APP_UI_URL = env.get('ALEPH_UI_URL', 'http://localhost:8080/')
APP_LOGO = env.get('ALEPH_LOGO', '/static/logo.png')
APP_FAVICON = env.get('ALEPH_FAVICON', '/static/logo.png')
Example #2
0
# Configuration defaults.
#
# You should never edit this file directly for deployment or in the developer
# setup. Wherever possible use environment variables to override the
# defaults.
import os
import uuid
from servicelayer import env
from flask_babel import lazy_gettext

# Show error messages to the user.
DEBUG = env.to_bool('ALEPH_DEBUG', False)
# Propose HTTP caching to the user agents.
CACHE = env.to_bool('ALEPH_CACHE', not DEBUG)
# Puts the system into read-only mode and displays a warning.
MAINTENANCE = env.to_bool('ALEPH_MAINTENANCE', False)
# Unit test context.
TESTING = False

###############################################################################
# General instance information

APP_TITLE = env.get('ALEPH_APP_TITLE', lazy_gettext('Aleph'))
APP_DESCRIPTION = env.get('ALEPH_APP_DESCRIPTION', '')
APP_NAME = env.get('ALEPH_APP_NAME', 'aleph')
APP_UI_URL = env.get('ALEPH_UI_URL', 'http://localhost:8080/')
APP_LOGO = env.get('ALEPH_LOGO', '/static/logo.png')
APP_FAVICON = env.get('ALEPH_FAVICON', '/static/favicon.png')

# Show a system-wide banner in the user interface.
APP_BANNER = env.get('ALEPH_APP_BANNER')
Example #3
0
# Configuration defaults.
#
# You should never edit this file directly for deployment or in the developer
# setup. Wherever possible use environment variables to override the
# defaults.
import os
from servicelayer import env
from servicelayer import settings as sls
from flask_babel import lazy_gettext

# Show error messages to the user.
DEBUG = env.to_bool('ALEPH_DEBUG', False)
# Propose HTTP caching to the user agents.
CACHE = env.to_bool('ALEPH_CACHE', not DEBUG)
# Puts the system into read-only mode and displays a warning.
MAINTENANCE = env.to_bool('ALEPH_MAINTENANCE', False)
# Unit test context.
TESTING = False

###############################################################################
# General instance information

APP_TITLE = env.get('ALEPH_APP_TITLE', lazy_gettext('Aleph'))
APP_DESCRIPTION = env.get('ALEPH_APP_DESCRIPTION', '')
APP_NAME = env.get('ALEPH_APP_NAME', 'aleph')
APP_UI_URL = env.get('ALEPH_UI_URL', 'http://localhost:8080/')
APP_LOGO = env.get('ALEPH_LOGO', '/static/logo.png')
APP_FAVICON = env.get('ALEPH_FAVICON', '/static/logo.png')

# Show a system-wide banner in the user interface.
APP_BANNER = env.get('ALEPH_APP_BANNER')
Example #4
0
from servicelayer import env
from servicelayer import settings as sls
from ftmstore import settings as sts

TESTING = False

# Document conversion service
CONVERT_URL = env.get("UNOSERVICE_URL", "http://convert-document:3000/convert")
CONVERT_URL = env.get("INGESTORS_CONVERT_DOCUMENT_URL", CONVERT_URL)
CONVERT_TIMEOUT = env.to_int("INGESTORS_CONVERT_TIMEOUT", 7200)  # 2 hrs
CONVERT_RETRIES = env.to_int("INGESTORS_CONVERT_RETRIES", 256)

# Enable (expensive!) Google Cloud API
OCR_VISION_API = env.to_bool("INGESTORS_OCR_VISION_API", False)

# Geonames data file
GEONAMES_PATH = env.get("INGESTORS_GEONAMES_PATH",
                        "/ingestors/data/geonames.txt")

# FastText lid model file
LID_MODEL_PATH = env.get("INGESTORS_LID_MODEL_PATH",
                         "/ingestors/data/lid.176.ftz")

# Disable entity extraction
ANALYZE_ENTITIES = env.to_bool("INGESTORS_ANALYZE_ENTITIES", True)

# List available NER models
NER_MODELS = set(env.to_list("INGESTORS_NER_MODELS", ["eng"]))
NER_DISABLE = ["ara"]
NER_DISABLE = set(env.to_list("INGESTORS_NER_DISABLE", NER_DISABLE))
NER_DEFAULT_MODEL = "xx"
Example #5
0
 def is_available(cls):
     try:
         from google.cloud.vision import ImageAnnotatorClient  # noqa
     except ImportError:
         return False
     return env.to_bool('OCR_VISION_API', False)
Example #6
0
import os
import pkg_resources
import multiprocessing
from servicelayer import env
from servicelayer import settings as sls

###############################################################################
# Core configuration
VERSION = pkg_resources.get_distribution('memorious').version
APP_NAME = env.get('MEMORIOUS_APP_NAME', 'memorious')

# Enable debug logging etc.
DEBUG = env.to_bool('MEMORIOUS_DEBUG', default=False)
TESTING = False

# Base operating path
BASE_PATH = os.path.join(os.getcwd(), 'data')
BASE_PATH = env.get('MEMORIOUS_BASE_PATH', BASE_PATH)

# Override servicelayer archive if undefined
sls.ARCHIVE_PATH = sls.ARCHIVE_PATH or os.path.join(BASE_PATH, 'archive')

# Directory which contains crawler pipeline YAML specs
CONFIG_PATH = env.get('MEMORIOUS_CONFIG_PATH')

# Try and run scrapers in a way that only acquires new data
INCREMENTAL = env.to_bool('MEMORIOUS_INCREMENTAL', default=True)

# How many days until an incremental crawl expires
EXPIRE = env.to_int('MEMORIOUS_EXPIRE', 1)
Example #7
0
# Configuration defaults.
#
# You should never edit this file directly for deployment or in the developer
# setup. Wherever possible use environment variables to override the
# defaults.
import os
import uuid
from servicelayer import env
from flask_babel import lazy_gettext

# Show error messages to the user.
DEBUG = env.to_bool('ALEPH_DEBUG', False)
# Propose HTTP caching to the user agents.
CACHE = env.to_bool('ALEPH_CACHE', not DEBUG)
# Puts the system into read-only mode and displays a warning.
MAINTENANCE = env.to_bool('ALEPH_MAINTENANCE', False)
# Unit test context.
TESTING = False

###############################################################################
# General instance information

APP_TITLE = env.get('ALEPH_APP_TITLE', lazy_gettext('Aleph'))
APP_DESCRIPTION = env.get('ALEPH_APP_DESCRIPTION', '')
APP_NAME = env.get('ALEPH_APP_NAME', 'aleph')
APP_UI_URL = env.get('ALEPH_UI_URL', 'http://localhost:8080/')
APP_LOGO = env.get('ALEPH_LOGO', '/static/logo.png')
APP_FAVICON = env.get('ALEPH_FAVICON', '/static/favicon.png')

# Show a system-wide banner in the user interface.
APP_BANNER = env.get('ALEPH_APP_BANNER')
Example #8
0
from servicelayer import env
from servicelayer import settings as sls
from ftmstore import settings as sts

TESTING = False

# Document conversion service
CONVERT_URL = env.get('UNOSERVICE_URL', 'http://convert-document:3000/convert')
CONVERT_URL = env.get('INGESTORS_CONVERT_DOCUMENT_URL', CONVERT_URL)
CONVERT_TIMEOUT = env.to_int('INGESTORS_CONVERT_TIMEOUT', 7200)  # 2 hrs

# Enable (expensive!) Google Cloud API
OCR_VISION_API = env.to_bool('INGESTORS_OCR_VISION_API', False)

# Geonames data file
GEONAMES_PATH = env.get('INGESTORS_GEONAMES_PATH',
                        '/ingestors/data/geonames.txt')

# FastText lid model file
LID_MODEL_PATH = env.get('INGESTORS_LID_MODEL_PATH',
                         '/ingestors/data/lid.176.ftz')

# Disable entity extraction
ANALYZE_ENTITIES = env.to_bool('INGESTORS_ANALYZE_ENTITIES', True)

# List available NER models
NER_MODELS = set(env.to_list('INGESTORS_NER_MODELS', ['eng']))
NER_DEFAULT_MODEL = 'xx'

# Use the environment variable set in aleph.env
sts.DATABASE_URI = env.get('ALEPH_DATABASE_URI', sts.DATABASE_URI)
Example #9
0
# Configuration defaults.
#
# You should never edit this file directly for deployment or in the developer
# setup. Wherever possible use environment variables to override the
# defaults.
import os
from servicelayer import env
from urllib.parse import urlparse
from flask_babel import lazy_gettext
from datetime import timedelta

# The aleph module directory
APP_DIR = os.path.abspath(os.path.dirname(__file__))

# Show error messages to the user.
DEBUG = env.to_bool("ALEPH_DEBUG", False)
# Profile requests
PROFILE = env.to_bool("ALEPH_PROFILE", False)
# Propose HTTP caching to the user agents.
CACHE = env.to_bool("ALEPH_CACHE", not DEBUG)
# Puts the system into read-only mode and displays a warning.
MAINTENANCE = env.to_bool("ALEPH_MAINTENANCE", False)
# Unit test context.
TESTING = False

###############################################################################
# General instance information

APP_TITLE = env.get("ALEPH_APP_TITLE", lazy_gettext("Aleph"))
APP_NAME = env.get("ALEPH_APP_NAME", "aleph")
APP_UI_URL = env.get("ALEPH_UI_URL", "http://localhost:8080/")
Example #10
0
import os
import pkg_resources
from servicelayer import env
from servicelayer import settings as sls

###############################################################################
# Core configuration
VERSION = pkg_resources.get_distribution("memorious").version
APP_NAME = env.get("MEMORIOUS_APP_NAME", "memorious")

# Enable debug logging etc.
DEBUG = env.to_bool("MEMORIOUS_DEBUG", default=False)
TESTING = False

# Base operating path
BASE_PATH = os.path.join(os.getcwd(), "data")
BASE_PATH = env.get("MEMORIOUS_BASE_PATH", BASE_PATH)

# Override servicelayer archive if undefined
sls.ARCHIVE_PATH = sls.ARCHIVE_PATH or os.path.join(BASE_PATH, "archive")

# Directory which contains crawler pipeline YAML specs
CONFIG_PATH = env.get("MEMORIOUS_CONFIG_PATH")

# Try and run scrapers in a way that only acquires new data
INCREMENTAL = env.to_bool("MEMORIOUS_INCREMENTAL", default=True)

# Continue running the crawler even when we encounter an error
CONTINUE_ON_ERROR = env.to_bool("MEMORIOUS_CONTINUE_ON_ERROR", default=False)

# How many days until an incremental crawl expires
Example #11
0
# Redis cache
# URL format: redis://localhost:6379/0
REDIS_URL = env.get("REDIS_URL")
REDIS_SHORT = 84700
REDIS_LONG = REDIS_SHORT * 200
REDIS_EXPIRE = env.to_int("REDIS_EXPIRE", REDIS_SHORT * 7)
REDIS_PREFIX = "sla"

# Persistent database tags
TAGS_DATABASE_URI = env.get("TAGS_DATABASE_URI", "sqlite://")

# Worker
WORKER_RETRY = env.to_int("WORKER_RETRY", 3)
WORKER_THREADS = env.to_int("WORKER_THREADS", multiprocessing.cpu_count())
WORKER_REPORTING = env.to_bool("WORKER_REPORTING", True)

# Amazon client credentials
AWS_KEY_ID = env.get("AWS_ACCESS_KEY_ID")
AWS_SECRET_KEY = env.get("AWS_SECRET_ACCESS_KEY")
AWS_REGION = env.get("AWS_REGION", "eu-west-1")
# S3 compatible Minio host if using Minio for storage
ARCHIVE_ENDPOINT_URL = env.get("ARCHIVE_ENDPOINT_URL")

# Storage type (either 's3', 'gs', or 'file', i.e. local file system):
ARCHIVE_TYPE = env.get("ARCHIVE_TYPE", "file")
ARCHIVE_BUCKET = env.get("ARCHIVE_BUCKET")
ARCHIVE_PATH = env.get("ARCHIVE_PATH")
PUBLICATION_BUCKET = env.get("PUBLICATION_BUCKET", ARCHIVE_BUCKET)

# Logging
Example #12
0
import os
import pkg_resources
import multiprocessing
from servicelayer import env
from servicelayer import settings as sls

###############################################################################
# Core configuration
VERSION = pkg_resources.get_distribution("memorious").version
APP_NAME = env.get("MEMORIOUS_APP_NAME", "memorious")

# Enable debug logging etc.
DEBUG = env.to_bool("MEMORIOUS_DEBUG", default=False)
TESTING = False

# Base operating path
BASE_PATH = os.path.join(os.getcwd(), "data")
BASE_PATH = env.get("MEMORIOUS_BASE_PATH", BASE_PATH)

# Override servicelayer archive if undefined
sls.ARCHIVE_PATH = sls.ARCHIVE_PATH or os.path.join(BASE_PATH, "archive")

# Directory which contains crawler pipeline YAML specs
CONFIG_PATH = env.get("MEMORIOUS_CONFIG_PATH")

# Try and run scrapers in a way that only acquires new data
INCREMENTAL = env.to_bool("MEMORIOUS_INCREMENTAL", default=True)

# How many days until an incremental crawl expires
EXPIRE = env.to_int("MEMORIOUS_EXPIRE", 1)
Example #13
0
from servicelayer import env
from servicelayer import settings as sls
from ftmstore import settings as sts

TESTING = False

# When set to True, a debugpy server will be enabled in cli.py process()
DEBUGPY_PROCESS = env.to_bool("INGESTORS_DEBUGPY_PROCESS", False)
# The address that the debugpy server should bind to
DEBUGPY_ADDRESS = env.get("INGESTORS_DEBUGPY_ADDRESS", "0.0.0.0")
# The port that the debugpy server should listen for a connection on
DEBUGPY_PORT = env.to_int("INGESTORS_DEBUGPY_PORT", 5678)
# When set to True, after setting up the debug server the application will block
# and wait for a client connection before continuing with processing
DEBUGPY_WAIT_FOR_CLIENT = env.to_bool("INGESTORS_DEBUGPY_WAIT_FOR_CLIENT", False)

# Document conversion service
CONVERT_URL = env.get("UNOSERVICE_URL", "http://convert-document:3000/convert")
CONVERT_URL = env.get("INGESTORS_CONVERT_DOCUMENT_URL", CONVERT_URL)
CONVERT_TIMEOUT = env.to_int("INGESTORS_CONVERT_TIMEOUT", 7200)  # 2 hrs

# Enable (expensive!) Google Cloud API
OCR_VISION_API = env.to_bool("INGESTORS_OCR_VISION_API", False)

# Enable Google Cloud Translation API
TRANSLATION_API = env.to_bool("INGESTORS_TRANSLATION_API", False)

# White list of language IDs for languages that should be translated
# An empty white list is considered a wildcard, allowing all languages to be translated
TRANSLATION_LANGUAGE_WHITE_LIST = env.to_list("INGESTORS_TRANSLATION_LANGUAGE_WHITE_LIST", None)
Example #14
0
from servicelayer import env

UNOSERVICE_URL = env.get('UNOSERVICE_URL')
OCR_VISION_API = env.to_bool('OCR_VISION_API', False)