def listen(self, timeout=None): '''listen to the queue, ingest what you hear, and report''' if not self.queue: raise InsufficientConfiguration('No queue configured.') self.queue.set_handler(self.handler) self.queue.drain(timeout=timeout)
def _read_config_file(config_file): if config_file is None: config_file = os.environ.get('DATALAKE_CONFIG', DEFAULT_CONFIG) if os.path.exists(config_file): load_dotenv(config_file) elif config_file != DEFAULT_CONFIG: msg = 'Config file {} not exist.'.format(config_file) raise InsufficientConfiguration(msg)
def _prepare_connection(self, connection): region = os.environ.get('AWS_REGION') if connection: self._connection = connection elif region: self._connection = boto.dynamodb2.connect_to_region(region) else: msg = 'Please provide a connection or configure a region' raise InsufficientConfiguration(msg)
def _get_sentry_handler(): dsn = os.environ.get('DATALAKE_SENTRY_DSN') if not dsn: return None if not sentry_available(): msg = 'DATALAKE_SENTRY_DSN is configured but raven is not installed. ' msg += '`pip install datalake[sentry]` to turn this feature on.' raise InsufficientConfiguration(msg) return { 'level': 'ERROR', 'class': 'raven.handlers.logging.SentryHandler', 'dsn': dsn }
def _listen(self, timeout=None, workers=1): '''listen for files in the queue directory and push them''' from . import __version__ log.info('------------------------------') log.info('datalake ' + __version__) self._workers = [] if workers <= 0: msg = 'number of upload workers cannot be zero or negative' raise InsufficientConfiguration(msg) if workers > 1: # when multipe workers are requested, the main thread monitors the # queue directory and puts the files in a Queue that is serviced by # the worker threads. So the word queue is a bit overloaded in this # module. self._queue = Queue() self._workers = [self._create_worker(i) for i in range(workers)] for f in os.listdir(self.queue_dir): path = os.path.join(self.queue_dir, f) self._push(path) self._run(timeout)
def _validate_queue_dir(self): if self.queue_dir is None: raise InsufficientConfiguration('Please set DATALAKE_QUEUE_DIR') self.queue_dir = os.path.abspath(self.queue_dir)
def wrapped(*args, **kwargs): if not has_queue: msg = 'This feature requires the queuable deps. ' msg += '`pip install datalake[queuable]` to turn this feature on.' raise InsufficientConfiguration(msg) return f(*args, **kwargs)
def http_url(self): self._http_url = self._http_url or environ.get('DATALAKE_HTTP_URL') if self._http_url is None: raise InsufficientConfiguration('Please specify DATALAKE_HTTP_URL') return self._http_url.rstrip('/')
def from_config(cls): table_name = os.environ.get('DATALAKE_DYNAMODB_TABLE') if table_name is None: raise InsufficientConfiguration('Please specify a dynamodb table') return cls(table_name)
def from_config(cls): queue_name = os.environ.get('DATALAKE_INGESTION_QUEUE') if queue_name is None: raise InsufficientConfiguration('Please configure a queue') return cls(queue_name)