def test_manifest(self): """Load the manifest and convert it to a string to check the round-trip""" from ambry.warehouse.manifest import Manifest from ambry.util import get_logger from ambry.util import print_yaml m = Manifest(self.m, get_logger('TL')) self.assertEqual(self.m_contents.strip(), str(m).strip()) l = self.get_library() l.put_bundle(self.bundle) for k, ident in l.list().items(): print ident w = self.get_warehouse(l, 'sqlite') print 'Installing to ', w.database.path w.title = "This is the Warehouse!" w.about = "A Warehouse full of wonder" w.install_manifest(m) extracts = w.extract(force=True) print print_yaml(extracts)
def get_dataset(self): """Return the dataset.""" from sqlalchemy.exc import OperationalError from ..dbexceptions import NotFoundError from ambry.orm import Dataset try: ds = (self.session.query(Dataset).one()) if not ds: raise NotFoundError( "No dataset record found in '{}'".format( self.dsn)) return ds except OperationalError: raise NotFoundError( "No dataset record found in '{}'".format( self.dsn)) except Exception as e: from ..util import get_logger # self.logger can get caught in a recursion loop logger = get_logger(__name__) logger.error( "Failed to get dataset: {}; {}".format( e.message, self.dsn)) raise
def test_manifest(self): """Load the manifest and convert it to a string to check the round-trip""" from ambry.warehouse.manifest import Manifest from ambry.util import get_logger from ambry.util import print_yaml m = Manifest(self.m,get_logger('TL') ) self.assertEqual(self.m_contents.strip(), str(m).strip()) l = self.get_library() l.put_bundle(self.bundle) for k, ident in l.list().items(): print ident w = self.get_warehouse(l, 'sqlite') print 'Installing to ', w.database.path w.title = "This is the Warehouse!" w.about = "A Warehouse full of wonder" w.install_manifest(m) extracts = w.extract(force=True) print print_yaml(extracts)
def get_dataset(self): """Return the dataset.""" from sqlalchemy.exc import OperationalError from ..dbexceptions import NotFoundError from ambry.orm import Dataset try: ds = (self.session.query(Dataset).one()) if not ds: raise NotFoundError("No dataset record found in '{}'".format( self.dsn)) return ds except OperationalError: raise NotFoundError("No dataset record found in '{}'".format( self.dsn)) except Exception as e: from ..util import get_logger # self.logger can get caught in a recursion loop logger = get_logger(__name__) logger.error("Failed to get dataset: {}; {}".format( e.message, self.dsn)) raise
def test_manifest_parts(self): from old.ipython.manifest import ManifestMagicsImpl mf = Manifest('', get_logger('TL')) mmi = ManifestMagicsImpl(mf) m_head = """ TITLE: A Test Manifest, For Testing UID: b4303f85-7d07-471d-9bcb-6980ea1bbf18 DATABASE: spatialite:///tmp/census-race-ethnicity.db DIR: /tmp/warehouse """ mmi.manifest('', m_head) mmi.extract('foobar AS csv TO /bin/bar/bingo') mmi.extract('foobar AS csv TO /bin/bar/bingo') mmi.extract('foobar AS csv TO /bin/bar/bingo2') mmi.extract('foobar AS csv TO /bin/bar/bingo') mmi.partitions('', 'one\ntwo\nthree\nfour') mmi.view('foo_view_1', '1234\n5678\n') mmi.view('foo_view_2', '1234\n5678\n') mmi.mview('foo_mview_1', '1234\n5678\n') mmi.mview('foo_mview_2', '1234\n5678\n') mmi.view('foo_view_1', '1234\n5678\n') mmi.view('foo_view_2', '1234\n5678\n') mmi.mview('foo_mview_1', '1234\n5678\n') mmi.mview('foo_mview_2', '1234\n5678\n')
def test_has(self): """ Does warehouse has partition """ self.waho = self._default_warehouse() mf = Manifest(self.mf, get_logger('TL')) self.waho.install_manifest(mf) self.assertTrue(self.waho.has('source-dataset-subset-variation-tthree-0.0.1'))
def test_partitions_list(self): self.waho = self._default_warehouse() mf = Manifest(self.mf, get_logger('TL')) self.waho.install_manifest(mf) s = [str(c) for c in self.waho.list()] self.assertIn('source-dataset-subset-variation-tthree-0.0.1~piEGPXmDC8001001', s) tst = (mfile.path for mfile in self.waho.manifests) self.assertIn(mf.path, tst)
def test_load_insert(self): """ Load data from one table to another """ self.waho = self._default_warehouse() mf = Manifest(self.mf, get_logger('TL')) self.waho.install_manifest(mf) p = self.bundle.partitions.find(name='source-dataset-subset-variation-tthree') self.waho.load_local(p, 'tthree', 'piEGPXmDC8001001_tthree')
def test_manifest(self): """ Load the manifest and convert it to a string to check the round-trip """ m_contents = None with open(self.mf) as f: m_contents = f.read() mf = Manifest(self.mf, get_logger('TL')) orig_mf = m_contents.replace('\n', '').strip() conv_mf = str(mf).replace('\n', '').strip() self.assertEqual(orig_mf, conv_mf)
def get_warehouse(self, l, name, delete=True): from ambry.util import get_logger from ambry.warehouse import new_warehouse w = new_warehouse(self.rc.warehouse(name), l) w.logger = get_logger('unit_test') lr = self.bundle.init_log_rate(10000) w.logger = TestLogger(lr) if delete: w.database.enable_delete = True w.database.delete() w.create() return w
def get_warehouse(self, l, name, delete = True): from ambry.util import get_logger from ambry.warehouse import new_warehouse w = new_warehouse(self.rc.warehouse(name), l) w.logger = get_logger('unit_test') lr = self.bundle.init_log_rate(10000) w.logger = TestLogger(lr) if delete: w.database.enable_delete = True w.database.delete() w.create() return w
def __init__(self, library, dsn=None, logger=None): from ambry.library import Library assert isinstance(library, Library) self._library = library if not logger: import logging self._logger = get_logger(__name__, level=logging.ERROR, propagate=False) else: self._logger = logger if not dsn: # Use library database. dsn = library.database.dsn # Initialize appropriate backend. if dsn.startswith('sqlite:'): from ambry.mprlib.backends.sqlite import SQLiteBackend self._logger.debug('Initializing sqlite warehouse.') self._backend = SQLiteBackend(library, dsn) elif dsn.startswith('postgres'): try: from ambry.mprlib.backends.postgresql import PostgreSQLBackend self._logger.debug('Initializing postgres warehouse.') self._backend = PostgreSQLBackend(library, dsn) except ImportError as e: from ambry.mprlib.backends.sqlite import SQLiteBackend from ambry.util import set_url_part, select_from_url dsn = "sqlite:///{}/{}".format( self._library.filesystem.build('warehouses'), select_from_url(dsn, 'path').strip('/') + ".db") self._logger.error( "Failed to import required modules ({})for Postgres warehouse. Using Sqlite dsn={}" .format(e, dsn)) self._backend = SQLiteBackend(library, dsn) else: raise Exception('Do not know how to handle {} dsn.'.format(dsn)) self._warehouse_dsn = dsn
def logger(self): from ambry.util import get_logger import logging logger = get_logger('ipython', clear=True, template="%(levelname)s %(message)s") logger.setLevel(logging.INFO) class ClearOutputAdapter(logging.LoggerAdapter): def process(self, msg, kwargs): from IPython.display import clear_output clear_output() return msg, kwargs logger = ClearOutputAdapter(logger, None) return logger
def test_dbobj_create_from_manifest(self): """ Test creating tables, views, mviews, indexs and executing custom sql """ from sqlalchemy.exc import OperationalError test_table = 'tthree' test_view = 'test_view' test_mview = 'test_mview' augmented_table_name = 'piEGPXmDC8001001_tthree' self.waho = self._default_warehouse() mf = Manifest(self.mf, get_logger('TL')) self.waho.install_manifest(mf) all_tbvw = (t.name for t in self.waho.tables) # tables self.assertIn(test_table, all_tbvw) self.assertEqual(test_table, self.waho.orm_table_by_name(test_table).name) self.assertTrue(self.waho.has_table(Config.__tablename__)) # views self.assertIn(test_view, all_tbvw) self.assertIn(test_mview, all_tbvw) self.assertEqual('view', self.waho.orm_table_by_name(test_view).type) self.assertEqual('mview', self.waho.orm_table_by_name(test_mview).type) # augmented_table_name test self.assertEqual( augmented_table_name, self.waho.orm_table_by_name(augmented_table_name).name) # indexs self.assertRaises( OperationalError, self.waho.run_sql, 'Create index test_index on files (f_id)') # SQL self.assertTrue(self.waho.has_table('sql_test'))
def logger(self): from ambry.util import get_logger import logging logger = get_logger( 'ipython', clear=True, template="%(levelname)s %(message)s") logger.setLevel(logging.INFO) class ClearOutputAdapter(logging.LoggerAdapter): def process(self, msg, kwargs): from IPython.display import clear_output clear_output() return msg, kwargs logger = ClearOutputAdapter(logger, None) return logger
def __init__(self, library, dsn=None, logger = None): from ambry.library import Library assert isinstance(library, Library) self._library = library if not logger: import logging self._logger = get_logger(__name__, level=logging.ERROR, propagate=False) else: self._logger = logger if not dsn: # Use library database. dsn = library.database.dsn # Initialize appropriate backend. if dsn.startswith('sqlite:'): from ambry.mprlib.backends.sqlite import SQLiteBackend self._logger.debug('Initializing sqlite warehouse.') self._backend = SQLiteBackend(library, dsn) elif dsn.startswith('postgres'): try: from ambry.mprlib.backends.postgresql import PostgreSQLBackend self._logger.debug('Initializing postgres warehouse.') self._backend = PostgreSQLBackend(library, dsn) except ImportError as e: from ambry.mprlib.backends.sqlite import SQLiteBackend from ambry.util import set_url_part, select_from_url dsn = "sqlite:///{}/{}".format(self._library.filesystem.build('warehouses'), select_from_url(dsn,'path').strip('/')+".db") self._logger.error("Failed to import required modules ({})for Postgres warehouse. Using Sqlite dsn={}" .format(e, dsn)) self._backend = SQLiteBackend(library, dsn) else: raise Exception('Do not know how to handle {} dsn.'.format(dsn)) self._warehouse_dsn = dsn
def test_manifest_parts(self): from ambry.warehouse.manifest import Manifest from ambry.util import get_logger from old.ipython.manifest import ManifestMagicsImpl m = Manifest('', get_logger('TL')) mmi = ManifestMagicsImpl(m) m_head = """ TITLE: A Test Manifest, For Testing UID: b4303f85-7d07-471d-9bcb-6980ea1bbf18 DATABASE: spatialite:///tmp/census-race-ethnicity.db DIR: /tmp/warehouse """ mmi.manifest('',m_head) mmi.extract('foobar AS csv TO /bin/bar/bingo') mmi.extract('foobar AS csv TO /bin/bar/bingo') mmi.extract('foobar AS csv TO /bin/bar/bingo2') mmi.extract('foobar AS csv TO /bin/bar/bingo') mmi.partitions('','one\ntwo\nthree\nfour') mmi.view('foo_view_1','1234\n5678\n') mmi.view('foo_view_2', '1234\n5678\n') mmi.mview('foo_mview_1', '1234\n5678\n') mmi.mview('foo_mview_2', '1234\n5678\n') mmi.view('foo_view_1', '1234\n5678\n') mmi.view('foo_view_2', '1234\n5678\n') mmi.mview('foo_mview_1', '1234\n5678\n') mmi.mview('foo_mview_2', '1234\n5678\n') #print yaml.dump(m.sections, default_flow_style=False) print str(m)
def test_manifest_parts(self): from ambry.warehouse.manifest import Manifest from ambry.util import get_logger from old.ipython.manifest import ManifestMagicsImpl m = Manifest('', get_logger('TL')) mmi = ManifestMagicsImpl(m) m_head = """ TITLE: A Test Manifest, For Testing UID: b4303f85-7d07-471d-9bcb-6980ea1bbf18 DATABASE: spatialite:///tmp/census-race-ethnicity.db DIR: /tmp/warehouse """ mmi.manifest('', m_head) mmi.extract('foobar AS csv TO /bin/bar/bingo') mmi.extract('foobar AS csv TO /bin/bar/bingo') mmi.extract('foobar AS csv TO /bin/bar/bingo2') mmi.extract('foobar AS csv TO /bin/bar/bingo') mmi.partitions('', 'one\ntwo\nthree\nfour') mmi.view('foo_view_1', '1234\n5678\n') mmi.view('foo_view_2', '1234\n5678\n') mmi.mview('foo_mview_1', '1234\n5678\n') mmi.mview('foo_mview_2', '1234\n5678\n') mmi.view('foo_view_1', '1234\n5678\n') mmi.view('foo_view_2', '1234\n5678\n') mmi.mview('foo_mview_1', '1234\n5678\n') mmi.mview('foo_mview_2', '1234\n5678\n') #print yaml.dump(m.sections, default_flow_style=False) print str(m)
def test_remove(self): """ Remove partition or bundle """ from sqlalchemy.exc import OperationalError, ProgrammingError self.waho = self._default_warehouse() mf = Manifest(self.mf, get_logger('TL')) self.waho.install_manifest(mf) # remove bundle self.waho.remove('d000') try: self.waho.remove('d000') except AttributeError: pass # # remove partition self.waho.remove('piEGPXmDC8001001') try: self.waho.remove('piEGPXmDC8001001') except ProgrammingError: pass except OperationalError: pass
def test_extract_table(self): """ Extract data from table to file """ from ambry.dbexceptions import NotFoundError test_table = 'tthree' test_view = 'test_view' test_mview = 'test_mview' self.waho = self._default_warehouse() mf = Manifest(self.mf, get_logger('TL')) self.waho.install_manifest(mf) tb = self.waho.tables.next() # test installed table self.waho.extract_table(tb.vid, 'csv') # test view self.waho.extract_table(test_view, 'csv') # + letter case self.waho.extract_table(test_mview, 'CsV') self.waho.extract_table(test_table, 'csv') self.waho.extract_table(test_table, 'json') try: import osgeo except ImportError: pass else: self.waho.extract_table(test_table, 'shapefile') self.waho.extract_table(test_table, 'geojson') self.waho.extract_table(test_table, 'kml') self.assertRaises(NotFoundError, self.waho.extract_table, 'blabla')
from collections import defaultdict import struct from sqlalchemy.sql.expression import text from ambry.orm.dataset import Dataset from ambry.library.search_backends.base import BaseDatasetIndex, BasePartitionIndex,\ BaseIdentifierIndex, BaseSearchBackend, IdentifierSearchResult,\ DatasetSearchResult, PartitionSearchResult, SearchTermParser from ambry.util import get_logger import logging logger = get_logger(__name__, propagate=False) #logger.setLevel(logging.DEBUG) class SQLiteSearchBackend(BaseSearchBackend): def _get_dataset_index(self): """ Returns dataset index. """ # returns initialized dataset index return DatasetSQLiteIndex(backend=self) def _get_partition_index(self): """ Returns partition index. """ return PartitionSQLiteIndex(backend=self) def _get_identifier_index(self): """ Returns identifier index. """
import ambry l = ambry.get_library() w = Warehouse(l) for row in Warehouse(l).query('SELECT * FROM <partition id or vid> ... '): print row w.close() """ import logging from sqlalchemy import create_engine from ambry.identity import ObjectNumber, NotObjectNumberError, TableNumber from ambry.util import get_logger logger = get_logger(__name__, level=logging.ERROR) class Warehouse(object): """ Provides SQL access to datasets in the library, allowing users to issue SQL queries, either as SQL or via SQLAlchemy, to return datasets. """ def __init__(self, library, dsn=None, logger = None): from ambry.library import Library assert isinstance(library, Library) self._library = library if not logger:
from whoosh.index import create_in, open_dir from whoosh.fields import Schema, TEXT, KEYWORD, ID, NGRAMWORDS, NGRAM from whoosh import scoring from whoosh.qparser import QueryParser from whoosh.query import Term from fs.opener import fsopendir from ambry.library.search_backends.base import BaseDatasetIndex, BasePartitionIndex,\ BaseIdentifierIndex, BaseSearchBackend, IdentifierSearchResult,\ DatasetSearchResult, PartitionSearchResult from ambry.util import get_logger logger = get_logger(__name__, level=logging.INFO, propagate=False) class WhooshSearchBackend(BaseSearchBackend): def __init__(self, library): # each whoosh index requires root directory. self.root_dir = fsopendir(library._fs.search()).getsyspath('/') super(self.__class__, self).__init__(library) def _get_dataset_index(self): """ Returns dataset index. """ # returns initialized dataset index return DatasetWhooshIndex(backend=self) def _get_partition_index(self): """ Returns partition index. """
Copyright (c) 2015 Civic Knowledge. This file is licensed under the terms of the Revised BSD License, included in this distribution as LICENSE.txt """ from sqlalchemy import Column as SAColumn, Integer from sqlalchemy import Text, String, ForeignKey from sqlalchemy import event import os.path import ssl from . import Base, MutationDict, JSONEncodedObj import logging from ambry.util import get_logger logger = get_logger(__name__) #logger.setLevel(logging.DEBUG) from fs.opener import Opener, opener class HTTPSOpener(Opener): names = ['https'] desc = """HTTPS file opener. HTTPS only supports reading files, and not much else. example: * https://www.example.org/index.html""" @classmethod def get_fs(cls, registry, fs_name, fs_name_params, fs_path, writeable, create_dir): from fs.httpfs import HTTPFS
import ambry l = ambry.get_library() w = Warehouse(l) for row in Warehouse(l).query('SELECT * FROM <partition id or vid> ... '): print row w.close() """ import logging from sqlalchemy import create_engine from ambry.identity import ObjectNumber, NotObjectNumberError, TableNumber from ambry.util import get_logger logger = get_logger(__name__, level=logging.ERROR) class Warehouse(object): """ Provides SQL access to datasets in the library, allowing users to issue SQL queries, either as SQL or via SQLAlchemy, to return datasets. """ def __init__(self, library, dsn=None, logger=None): from ambry.library import Library assert isinstance(library, Library) self._library = library if not logger: import logging self._logger = get_logger(__name__,
included in this distribution as LICENSE.txt """ from . import DatabaseInterface # @UnresolvedImport # from .inserter import ValueInserter # import os # import logging from ambry.util import get_logger # , memoize # from ..database.inserter import SegmentedInserter, SegmentInserterFactory from contextlib import contextmanager import atexit import weakref # import pdb global_logger = get_logger(__name__) # global_logger.setLevel(logging.DEBUG) from sqlalchemy.dialects import registry registry.register( "spatialite", "ambry.database.dialects.spatialite", "SpatialiteDialect") registry.register( "postgis", "ambry.database.dialects.postgis", "PostgisDialect") connections = dict()
''' Created on Sep 7, 2013 @author: eric ''' """ Copyright (c) 2013 Clarinova. This file is licensed under the terms of the Revised BSD License, included in this distribution as LICENSE.txt """ from ambry.util import get_logger import logging logger = get_logger(__name__) #logger.setLevel(logging.DEBUG) class InserterInterface(object): def __enter__(self): raise NotImplemented() def __exit__(self, type_, value, traceback): raise NotImplemented() def insert(self, row, **kwargs): raise NotImplemented() def close(self): raise NotImplemented() class UpdaterInterface(object): def __enter__(self): raise NotImplemented() def __exit__(self, type_, value, traceback): raise NotImplemented()
"""Create an OGR shapefile from a schema.""" import ogr import osr import gdal import os import os.path from ambry.orm import Column from ambry.dbexceptions import ConfigurationError from ambry.dbexceptions import ProcessError from ambry.util import get_logger import logging # @UnusedImport import logging.handlers # @UnusedImport global_logger = get_logger(__name__) global_logger.setLevel(logging.INFO) class FeatureError(ProcessError): pass ogr_type_map = { None: ogr.OFTString, Column.DATATYPE_TEXT: ogr.OFTString, Column.DATATYPE_VARCHAR: ogr.OFTString, Column.DATATYPE_INTEGER: ogr.OFTInteger, Column.DATATYPE_INTEGER64: ogr.OFTInteger, Column.DATATYPE_NUMERIC: ogr.OFTReal, Column.DATATYPE_REAL: ogr.OFTReal, Column.DATATYPE_FLOAT: ogr.OFTReal,
then lirbary is initalized For postgres libraries, a prototype database is constructed by appending -proto to the end of the name of the test database. The proto databse is created and populated, and then flagged for use as a template. When a test library is created, it is constructed with the proto library as its template. """ import logging import os import unittest from ambry.util import ensure_dir_exists, memoize, get_logger from ambry.library import Library logger = get_logger(__name__, level=logging.INFO, propagate=False) DEFAULT_ROOT = '/tmp/ambry-test' # Default root for the library roots ( The library root is one level down ) class ProtoLibrary(object): """Manage test libraries. Creates a proto library, with pre-built bundles, that can be copied quickly into a test library, providing bundles to test against""" def __init__(self, config_path=None): """ :param config_path: :return: """
import six import apsw from ambry_sources.med import sqlite as sqlite_med from ambry.util import get_logger from ambry.bundle.asql_parser import parse_view, parse_index from .base import DatabaseBackend from ..exceptions import MissingTableError, MissingViewError logger = get_logger(__name__) # debug logging # import logging debug_logger = get_logger(__name__, level=logging.ERROR, propagate=False) class SQLiteBackend(DatabaseBackend): """ Backend to install/query MPR files for SQLite database. """ def sql_processors(self): return [_preprocess_sqlite_view, _preprocess_sqlite_index] def install_module(self, connection): sqlite_med.install_mpr_module(connection) def install(self,
from test.test_base import TestBase # Must be first ambry import to get logger set to internal logger. from ambry.bundle import LibraryDbBundle from ambry.dbexceptions import ConfigurationError from ambry.identity import Identity from ambry.library import new_library from ambry.library.database import LibraryDb, ROOT_CONFIG_NAME_V from ambry.library.query import Resolver from ambry.orm import Dataset, Partition, Table, Column, ColumnStat, Code, Config, File from ambry.run import get_runconfig, RunConfig from ambry import util from test.bundles.testbundle.bundle import Bundle global_logger = util.get_logger(__name__) global_logger.setLevel(logging.FATAL) ckcache.filesystem.global_logger = global_logger class Test(TestBase): def setUp(self): super(Test, self).setUp() import test.bundles.testbundle.bundle self.bundle_dir = os.path.dirname(test.bundles.testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir, 'library-test-config.yaml'), os.path.join(self.bundle_dir, 'bundle.yaml'),