def test_get_connection(self, mocked_base_objstore):
        mocked_base_objstore.get_connection.return_value = 'conn'

        objectstore = ObjectStore(config='foobar')
        connection = objectstore.get_connection()
        assert connection == objectstore.connection
        assert connection == objectstore.get_connection()
def download_data(file: str, bucket_name: str, dest: Path):
    """Parallel batching of data."""

    destination = dest / Path(file)
    if destination.exists():
        logging.info(f"file already exists")
        return
    logging.info(f"preparing data from {file}")
    store = ObjectStore()  # NOTE: Object store client is not thread safe.
    # We need to instantiate it in each unit of parallelization.
    store.connect()
    gevent.sleep(0.001)
    download_file(file, bucket_name, store.client, destination)
def get_filename_generator(bucket_name: str):
    """Get a filename generator for a bucket."""
    store = ObjectStore(HOST, ACCESS_KEY, SECRET_KEY)
    store.connect()
    server_files = store.client.list_objects_v2(bucket_name,
                                                prefix="",
                                                recursive=True,
                                                start_after="")

    def gen_filenames(files):
        for file in files:
            yield file.object_name

    return gen_filenames(server_files)
def stream_files(files, bucket_name: str, dest: Path, pool_size: int = 16):
    """Stream all files."""

    store = ObjectStore(HOST, ACCESS_KEY, SECRET_KEY)
    store.connect()

    logging.info(
        f"starting a stream to download the data with {pool_size} workers")

    pool = Pool(pool_size + 1)

    jobs = [pool.spawn(download_data, file, dest) for file in files]

    gevent.joinall(jobs)
Example #5
0
 def __init__(self, class_loader, environment=None):
     self._class_loader = class_loader
     self._object_store = ObjectStore(class_loader)
     self._attribute_store = AttributeStore()
     self._root_context = class_loader.create_root_context()
     self._root_context.set_data(self, '?executor')
     self._root_context.set_data(self._class_loader, '?classLoader')
     self._root_context.set_data(environment, '?environment')
     self._root_context.set_data(self._object_store, '?objectStore')
     self._root_context.set_data(self._attribute_store, '?attributeStore')
     self._locks = {}
     dsl_yaql_functions.register(self._root_context)
     self._root_context = Context(self._root_context)
Example #6
0
class Exporter:
    def __init__(self):
        super().__init__()
        self.local_db = LocalDatabase()
        self.object_store = ObjectStore()

    def export_orphans(self):
        batch_names = [settings.NPR_NULL_VALUE]
        self._run_export(batch_names)

    def export_range(self, start_date, end_date):
        today = datetime.date.today()
        if end_date >= today:
            # ensure we never export today as its data is still incomplete
            end_date = today - datetime.timedelta(days=1)

        batch_names = self.get_batch_names_for_export(start_date, end_date)

        logger.info(f"Found {len(batch_names)} batch names for export")
        self._run_export(batch_names)

    def _run_export(self, batch_names):
        for batch_name in batch_names:
            filename = f"{batch_name}{settings.BACKUP_FILE_POSTFIX}"
            path = self.local_db.export_batch_to_csv(filename, batch_name)
            self.object_store.upload(path, filename)
            logger.info(f"Exported batch {batch_name}")

    def get_batch_names_for_export(self, start_date, end_date):
        num_days = (end_date - start_date).days + 1
        batch_names = [(start_date + timedelta(days=days)).strftime("%Y%m%d")
                       for days in range(num_days)]

        existing_batch_names = self.local_db.get_existing_batch_names()
        batches = set(batch_names).intersection(set(existing_batch_names))
        return sorted(list(batches))
Example #7
0
 def __init__(self):
     super().__init__()
     self.local_db = LocalDatabase()
     self.npr_db = NPRDatabase()
     self.object_store = ObjectStore()
Example #8
0
class Importer:
    def __init__(self):
        super().__init__()
        self.local_db = LocalDatabase()
        self.npr_db = NPRDatabase()
        self.object_store = ObjectStore()

    def import_orphans(self):
        batch_names = [settings.NPR_NULL_VALUE]
        self._run_import(batch_names)

    def import_range(self, start_date, end_date, override_existing=False):
        today = datetime.date.today()
        if end_date >= today:
            # ensure we never import today as its data is still incomplete
            end_date = today - datetime.timedelta(days=1)

        logger.info("Determining batch names for import...")
        batch_names = self.get_batch_names_for_download(
            start_date, end_date, override_existing
        )
        logger.info(f"{len(batch_names)} batches found to import")
        self._run_import(batch_names)

    def import_last_x_days(self, num_days):
        today = datetime.date.today()
        start_date = today - datetime.timedelta(days=num_days)
        return self.import_range(start_date, today)

    def _run_import(self, batch_names):
        if not batch_names:
            logger.info("Nothing to import")
            return

        for batch_name in batch_names:
            self.backup_batch(batch_name)

    def get_batch_names_for_download(self, start_date, end_date, override_existing):
        # We want batches that are requested and not yet backed up
        # (these are the set of candidates to back up).

        if override_existing:
            backed_up = []
        else:
            batch_names_in_local_db = self.local_db.get_existing_batch_names(
                require_table=False
            )
            batch_names_in_obj_store = self.object_store.get_existing_batch_names()
            backed_up = batch_names_in_obj_store + batch_names_in_local_db

        batch_names = self.npr_db.get_existing_batch_names()
        batch_names = filter_batch_names(start_date, end_date, batch_names)

        batch_names = list(set(batch_names) - set(backed_up))
        batch_names.sort()
        return batch_names

    def backup_batch(self, batch_name):
        """
        Retrieve records from NPR, store them in local database in batches.
        """
        logger.info(f"Backing up batch {batch_name}. Getting iterator...")
        npr_backup_iterator = self.npr_db.get_backup_iterator(
            batch_name, batch_size=settings.BATCH_SIZE
        )

        logger.info("Starting local import")
        start = time.perf_counter()
        self.local_db.backup_iterator(npr_backup_iterator)
        end = time.perf_counter()
        logger.info(f">> Done. Processing iterator took {end - start:0.2f}s")
 def test_init(self):
     objectstore = ObjectStore(config='foobar')
     assert objectstore.config == 'foobar'
     assert objectstore.connection is None
Example #10
0
class MuranoDslExecutor(object):
    def __init__(self, class_loader, environment=None):
        self._class_loader = class_loader
        self._object_store = ObjectStore(class_loader)
        self._attribute_store = AttributeStore()
        self._root_context = class_loader.create_root_context()
        self._root_context.set_data(self, '?executor')
        self._root_context.set_data(self._class_loader, '?classLoader')
        self._root_context.set_data(environment, '?environment')
        self._root_context.set_data(self._object_store, '?objectStore')
        self._root_context.set_data(self._attribute_store, '?attributeStore')
        self._locks = {}
        dsl_yaql_functions.register(self._root_context)
        self._root_context = Context(self._root_context)

    @property
    def object_store(self):
        return self._object_store

    @property
    def attribute_store(self):
        return self._attribute_store

    def to_yaql_args(self, args):
        if not args:
            return tuple()
        elif isinstance(args, types.TupleType):
            return args
        elif isinstance(args, types.ListType):
            return tuple(args)
        elif isinstance(args, types.DictionaryType):
            return tuple(args.items())
        else:
            raise ValueError()

    def invoke_method(self, name, this, context, murano_class, *args):
        if context is None:
            context = self._root_context
        implementations = this.type.find_method(name)
        delegates = []
        for declaring_class, name in implementations:
            method = declaring_class.get_method(name)
            if not method:
                continue
            arguments_scheme = method.arguments_scheme
            try:
                try:
                    params = self._evaluate_parameters(
                        arguments_scheme, context, this, *args)
                except Exception as e:
                    print e
                    params = self._evaluate_parameters(
                        arguments_scheme, context, this, *args)
                delegates.append(functools.partial(
                    self._invoke_method_implementation,
                    method, this, declaring_class, context, params))
            except TypeError:
                continue
        if len(delegates) < 1:
            raise exceptions.NoMethodFound(name)
        elif len(delegates) > 1:
            raise exceptions.AmbiguousMethodName(name)
        else:
            return delegates[0]()

    def _invoke_method_implementation(self, method, this, murano_class,
                                      context, params):
        body = method.body
        if not body:
            return None

        current_thread = eventlet.greenthread.getcurrent()
        if not hasattr(current_thread, '_murano_dsl_thread_marker'):
            thread_marker = current_thread._murano_dsl_thread_marker = \
                uuid.uuid4().hex
        else:
            thread_marker = current_thread._murano_dsl_thread_marker

        method_id = id(body)
        this_id = this.object_id

        event, marker = self._locks.get((method_id, this_id), (None, None))
        if event:
            if marker == thread_marker:
                return self._invoke_method_implementation_gt(
                    body, this, params, murano_class, context)
            event.wait()

        event = Event()
        self._locks[(method_id, this_id)] = (event, thread_marker)
        gt = eventlet.spawn(self._invoke_method_implementation_gt, body,
                            this, params, murano_class, context,
                            thread_marker)
        result = gt.wait()
        del self._locks[(method_id, this_id)]
        event.send()
        return result

    def _invoke_method_implementation_gt(self, body, this,
                                         params, murano_class, context,
                                         thread_marker=None):
        if thread_marker:
            current_thread = eventlet.greenthread.getcurrent()
            current_thread._murano_dsl_thread_marker = thread_marker
        if callable(body):
            if '_context' in inspect.getargspec(body).args:
                params['_context'] = self._create_context(
                    this, murano_class, context, **params)
            if inspect.ismethod(body) and not body.__self__:
                return body(this, **params)
            else:
                return body(**params)
        elif isinstance(body, expressions.DslExpression):
            return self.execute(body, murano_class, this, context, **params)
        else:
            raise ValueError()

    def _evaluate_parameters(self, arguments_scheme, context, this, *args):
        arg_names = list(arguments_scheme.keys())
        parameter_values = {}
        i = 0
        for arg in args:
            value = helpers.evaluate(arg, context)
            if isinstance(value, types.TupleType) and len(value) == 2 and \
                    isinstance(value[0], types.StringTypes):
                name = value[0]
                value = value[1]
                if name not in arguments_scheme:
                    raise TypeError()
            else:
                if i >= len(arg_names):
                    raise TypeError()
                name = arg_names[i]
                i += 1

            if callable(value):
                value = value()
            arg_spec = arguments_scheme[name]
            parameter_values[name] = arg_spec.validate(
                value, this, self._root_context, self._object_store)

        for name, arg_spec in arguments_scheme.iteritems():
            if name not in parameter_values:
                if not arg_spec.has_default:
                    raise TypeError()
                parameter_values[name] = arg_spec.validate(
                    helpers.evaluate(arg_spec.default, context),
                    this, self._root_context, self._object_store)

        return parameter_values

    def _create_context(self, this, murano_class, context, **kwargs):
        new_context = self._class_loader.create_local_context(
            parent_context=self._root_context,
            murano_class=murano_class)
        new_context.set_data(this)
        new_context.set_data(this, 'this')
        new_context.set_data(this, '?this')
        new_context.set_data(murano_class, '?type')
        new_context.set_data(context, '?callerContext')

        @EvalArg('obj', arg_type=MuranoObject)
        @EvalArg('property_name', arg_type=str)
        def obj_attribution(obj, property_name):
            return obj.get_property(property_name, murano_class)


        @EvalArg('prefix', str)
        @EvalArg('name', str)
        def validate(prefix, name):
            return murano_class.namespace_resolver.resolve_name(
                '%s:%s' % (prefix, name))

        new_context.register_function(obj_attribution, '#operator_.')
        new_context.register_function(validate, '#validate')
        for key, value in kwargs.iteritems():
            new_context.set_data(value, key)
        return new_context

    def execute(self, expression, murano_class, this, context, **kwargs):
        new_context = self._create_context(
            this, murano_class, context, **kwargs)
        return expression.execute(new_context, murano_class)

    def load(self, data):
        if not isinstance(data, types.DictionaryType):
            raise TypeError()
        self._attribute_store.load(data.get('Attributes') or [])
        return self._object_store.load(data.get('Objects') or {},
                                       None, self._root_context)
Example #11
0
import boto3
from datetime import datetime

import pickle
import json, os
import logging

import numpy

BACKEND = os.getenv("BACKEND")
COMPLETED_BUCKET = os.getenv("COMPLETED_BUCKET")
PENDING_BUCKET = os.getenv("PENDING_BUCKET")

# Initialize ObjectStore
try:
    ob = ObjectStore()
except Exception as ex:
    logging.error("Error is -", ex)

# Initialize Backend 
def init_backend():
    """Returns the backend to work with in the cron job"""
    token=os.getenv('BACKEND_TOKEN')
    IBMQ.save_account(token, overwrite=True)
    IBMQ.load_account()
    provider = IBMQ.get_provider(hub='ibm-q')
    IBMQ.get_provider(group='open')
    try:
        backend = provider.get_backend(BACKEND)
    except QiskitBackendNotFoundError as er:
        logging.error("Qiskit Backend not found, please check backend in .env.")
Example #12
0
 def object_store(self):
   if self._object_store is None:
       self._object_store = ObjectStore(self.object_dir())
   return self._object_store