def index_connect(local_config: LocalConfig = None, application_name: str = None, validate_connection: bool = True) -> Index: """ Create a Data Cube Index that can connect to a PostgreSQL server It contains all the required connection parameters, but doesn't actually check that the server is available. :param application_name: A short, alphanumeric name to identify this application. :param local_config: Config object to use. (optional) :param validate_connection: Validate database connection and schema immediately :raises datacube.index.Exceptions.IndexSetupError: """ if local_config is None: local_config = LocalConfig.find() driver_name = local_config.get('index_driver', 'default') index_driver = index_driver_by_name(driver_name) if not index_driver: raise RuntimeError( "No index driver found for %r. %s available: %s" % (driver_name, len(index_drivers()), ', '.join(index_drivers()))) return index_driver.connect_to_index( local_config, application_name=application_name, validate_connection=validate_connection)
def check(local_config: LocalConfig): """ Verify & view current configuration """ def echo_field(name, value): echo('{:<15}'.format(name + ':') + style(str(value), bold=True)) echo_field('Version', datacube.__version__) echo_field('Config files', ','.join(local_config.files_loaded)) echo_field( 'Host', '{}:{}'.format(local_config['db_hostname'] or 'localhost', local_config.get('db_port', None) or '5432')) echo_field('Database', local_config['db_database']) echo_field('User', local_config['db_username']) echo_field('Environment', local_config['env']) echo_field('Index Driver', local_config['index_driver']) echo() echo('Valid connection:\t', nl=False) try: index = index_connect(local_config=local_config) echo(style('YES', bold=True)) for role, user, description in index.users.list_users(): if user == local_config['db_username']: echo('You have %s privileges.' % style(role.upper(), bold=True)) except OperationalError as e: handle_exception('Error Connecting to Database: %s', e) except IndexSetupError as e: handle_exception('Database not initialised: %s', e)
def test_search_returning(index: Index, local_config: LocalConfig, pseudo_ls8_type: DatasetType, pseudo_ls8_dataset: Dataset, ls5_dataset_w_children) -> None: assert index.datasets.count() == 4, "Expected four test datasets" # Expect one product with our one dataset. results = list( index.datasets.search_returning( ('id', 'sat_path', 'sat_row'), platform='LANDSAT_8', instrument='OLI_TIRS', )) assert len(results) == 1 id_, path_range, sat_range = results[0] assert id_ == pseudo_ls8_dataset.id # TODO: output nicer types? assert path_range == NumericRange(Decimal('116'), Decimal('116'), '[]') assert sat_range == NumericRange(Decimal('74'), Decimal('84'), '[]') results = list( index.datasets.search_returning( ( 'id', 'metadata_doc', ), platform='LANDSAT_8', instrument='OLI_TIRS', )) assert len(results) == 1 id_, document = results[0] assert id_ == pseudo_ls8_dataset.id assert document == pseudo_ls8_dataset.metadata_doc my_username = local_config.get('db_username', DEFAULT_DB_USER) # Mixture of document and native fields results = list( index.datasets.search_returning( ('id', 'creation_time', 'format', 'label'), platform='LANDSAT_8', indexed_by=my_username, )) assert len(results) == 1 id_, creation_time, format_, label = results[0] assert id_ == pseudo_ls8_dataset.id assert format_ == 'PSEUDOMD' # It's always UTC in the document expected_time = creation_time.astimezone(tz.tzutc()).replace(tzinfo=None) assert expected_time.isoformat( ) == pseudo_ls8_dataset.metadata_doc['creation_dt'] assert label == pseudo_ls8_dataset.metadata_doc['ga_label']
class DatacubeReplicator(object): def __init__(self, config): self.remote_host = config['remote_host'] self.remote_user = config['remote_user'] self.db_password = config['db_password'] self.remote_dir = config['remote_dir'] self.local_dir = config['local_dir'] self.replication_defns = config['replicated_data'] self.client = None self.sftp = None self.tunnel = None self.remote_dc_config = None self.remote_dc = None self.local_index = index_connect() def run(self): self.connect() self.read_remote_config() self.connect_to_db() self.replicate_all() self.disconnect() def connect(self): client = SSHClient() client.load_system_host_keys() client.set_missing_host_key_policy(WarningPolicy()) client.connect(hostname=self.remote_host, username=self.remote_user) LOG.debug(client) self.client = client self.sftp = client.open_sftp() def disconnect(self): self.client.close() self.tunnel.stop() def read_remote_config(self): remote_config = ConfigParser() remote_config.read_string(_DEFAULT_CONF) with self.sftp.open('.datacube.conf') as fin: remote_config.read_file(fin) self.remote_dc_config = LocalConfig(remote_config) def connect_to_db(self): self.tunnel = SSHTunnelForwarder( self.remote_host, ssh_username=self.remote_user, remote_bind_address=(self.remote_dc_config.get( 'db_hostname', '127.0.0.1'), int(self.remote_dc_config.get('db_port', 5432)))) self.tunnel.start() # pylint: disable=protected-access self.remote_dc_config._config['datacube']['db_hostname'] = '127.0.0.1' self.remote_dc_config._config['datacube']['db_port'] = str( self.tunnel.local_bind_port) self.remote_dc_config._config['datacube'][ 'db_username'] = self.remote_user self.remote_dc_config._config['datacube'][ 'db_password'] = self.db_password # This requires the password from somewhere # Parsing it out of .pgpass sounds error prone and fragile # Lets put it in the configuration for now LOG.debug('Remote configuration loaded %s', self.remote_dc_config) self.remote_dc = Datacube(config=self.remote_dc_config) def replicate_all(self): for defn in tqdm(self.replication_defns, 'Replicating products'): self.replicate(defn) def replicate_all_products(self): products = self.remote_dc.index.products.get_all() for product in products: self.local_index.products.add(product) def replicate(self, defn): datasets = list(self.remote_dc.find_datasets(**defn)) if not datasets: LOG.info('No remote datasets found matching %s', defn) return # TODO: use generator not list product = datasets[0].type LOG.info('Ensuring remote product is in local index. %s', product) self.local_index.products.add(product) for dataset in tqdm(datasets, 'Datasets'): # dataset = remote_dc.index.datasets.get(dataset.id, include_sources=True) # We would need to pull the parent products down too # TODO: Include parent source datasets + product definitions dataset.sources = {} LOG.debug('Replicating dataset %s', dataset) remote_path = uri_to_path(dataset.local_uri) local_path = self.remote_to_local(uri_to_path(dataset.local_uri)) # Ensure local path exists Path(local_path).parent.mkdir(parents=True, exist_ok=True) # Download file self.sftp.get(remote_path, local_path) # Add to local index dataset.local_uri = 'file://' + local_path self.local_index.datasets.add(dataset) LOG.debug('Downloaded to %s', local_path) def remote_to_local(self, remote): return remote.replace(self.remote_dir, self.local_dir)