def test_get_connection(self, mocked_base_objstore): mocked_base_objstore.get_connection.return_value = 'conn' objectstore = ObjectStore(config='foobar') connection = objectstore.get_connection() assert connection == objectstore.connection assert connection == objectstore.get_connection()
def download_data(file: str, bucket_name: str, dest: Path): """Parallel batching of data.""" destination = dest / Path(file) if destination.exists(): logging.info(f"file already exists") return logging.info(f"preparing data from {file}") store = ObjectStore() # NOTE: Object store client is not thread safe. # We need to instantiate it in each unit of parallelization. store.connect() gevent.sleep(0.001) download_file(file, bucket_name, store.client, destination)
def get_filename_generator(bucket_name: str): """Get a filename generator for a bucket.""" store = ObjectStore(HOST, ACCESS_KEY, SECRET_KEY) store.connect() server_files = store.client.list_objects_v2(bucket_name, prefix="", recursive=True, start_after="") def gen_filenames(files): for file in files: yield file.object_name return gen_filenames(server_files)
def stream_files(files, bucket_name: str, dest: Path, pool_size: int = 16): """Stream all files.""" store = ObjectStore(HOST, ACCESS_KEY, SECRET_KEY) store.connect() logging.info( f"starting a stream to download the data with {pool_size} workers") pool = Pool(pool_size + 1) jobs = [pool.spawn(download_data, file, dest) for file in files] gevent.joinall(jobs)
def __init__(self, class_loader, environment=None): self._class_loader = class_loader self._object_store = ObjectStore(class_loader) self._attribute_store = AttributeStore() self._root_context = class_loader.create_root_context() self._root_context.set_data(self, '?executor') self._root_context.set_data(self._class_loader, '?classLoader') self._root_context.set_data(environment, '?environment') self._root_context.set_data(self._object_store, '?objectStore') self._root_context.set_data(self._attribute_store, '?attributeStore') self._locks = {} dsl_yaql_functions.register(self._root_context) self._root_context = Context(self._root_context)
class Exporter: def __init__(self): super().__init__() self.local_db = LocalDatabase() self.object_store = ObjectStore() def export_orphans(self): batch_names = [settings.NPR_NULL_VALUE] self._run_export(batch_names) def export_range(self, start_date, end_date): today = datetime.date.today() if end_date >= today: # ensure we never export today as its data is still incomplete end_date = today - datetime.timedelta(days=1) batch_names = self.get_batch_names_for_export(start_date, end_date) logger.info(f"Found {len(batch_names)} batch names for export") self._run_export(batch_names) def _run_export(self, batch_names): for batch_name in batch_names: filename = f"{batch_name}{settings.BACKUP_FILE_POSTFIX}" path = self.local_db.export_batch_to_csv(filename, batch_name) self.object_store.upload(path, filename) logger.info(f"Exported batch {batch_name}") def get_batch_names_for_export(self, start_date, end_date): num_days = (end_date - start_date).days + 1 batch_names = [(start_date + timedelta(days=days)).strftime("%Y%m%d") for days in range(num_days)] existing_batch_names = self.local_db.get_existing_batch_names() batches = set(batch_names).intersection(set(existing_batch_names)) return sorted(list(batches))
def __init__(self): super().__init__() self.local_db = LocalDatabase() self.npr_db = NPRDatabase() self.object_store = ObjectStore()
class Importer: def __init__(self): super().__init__() self.local_db = LocalDatabase() self.npr_db = NPRDatabase() self.object_store = ObjectStore() def import_orphans(self): batch_names = [settings.NPR_NULL_VALUE] self._run_import(batch_names) def import_range(self, start_date, end_date, override_existing=False): today = datetime.date.today() if end_date >= today: # ensure we never import today as its data is still incomplete end_date = today - datetime.timedelta(days=1) logger.info("Determining batch names for import...") batch_names = self.get_batch_names_for_download( start_date, end_date, override_existing ) logger.info(f"{len(batch_names)} batches found to import") self._run_import(batch_names) def import_last_x_days(self, num_days): today = datetime.date.today() start_date = today - datetime.timedelta(days=num_days) return self.import_range(start_date, today) def _run_import(self, batch_names): if not batch_names: logger.info("Nothing to import") return for batch_name in batch_names: self.backup_batch(batch_name) def get_batch_names_for_download(self, start_date, end_date, override_existing): # We want batches that are requested and not yet backed up # (these are the set of candidates to back up). if override_existing: backed_up = [] else: batch_names_in_local_db = self.local_db.get_existing_batch_names( require_table=False ) batch_names_in_obj_store = self.object_store.get_existing_batch_names() backed_up = batch_names_in_obj_store + batch_names_in_local_db batch_names = self.npr_db.get_existing_batch_names() batch_names = filter_batch_names(start_date, end_date, batch_names) batch_names = list(set(batch_names) - set(backed_up)) batch_names.sort() return batch_names def backup_batch(self, batch_name): """ Retrieve records from NPR, store them in local database in batches. """ logger.info(f"Backing up batch {batch_name}. Getting iterator...") npr_backup_iterator = self.npr_db.get_backup_iterator( batch_name, batch_size=settings.BATCH_SIZE ) logger.info("Starting local import") start = time.perf_counter() self.local_db.backup_iterator(npr_backup_iterator) end = time.perf_counter() logger.info(f">> Done. Processing iterator took {end - start:0.2f}s")
def test_init(self): objectstore = ObjectStore(config='foobar') assert objectstore.config == 'foobar' assert objectstore.connection is None
class MuranoDslExecutor(object): def __init__(self, class_loader, environment=None): self._class_loader = class_loader self._object_store = ObjectStore(class_loader) self._attribute_store = AttributeStore() self._root_context = class_loader.create_root_context() self._root_context.set_data(self, '?executor') self._root_context.set_data(self._class_loader, '?classLoader') self._root_context.set_data(environment, '?environment') self._root_context.set_data(self._object_store, '?objectStore') self._root_context.set_data(self._attribute_store, '?attributeStore') self._locks = {} dsl_yaql_functions.register(self._root_context) self._root_context = Context(self._root_context) @property def object_store(self): return self._object_store @property def attribute_store(self): return self._attribute_store def to_yaql_args(self, args): if not args: return tuple() elif isinstance(args, types.TupleType): return args elif isinstance(args, types.ListType): return tuple(args) elif isinstance(args, types.DictionaryType): return tuple(args.items()) else: raise ValueError() def invoke_method(self, name, this, context, murano_class, *args): if context is None: context = self._root_context implementations = this.type.find_method(name) delegates = [] for declaring_class, name in implementations: method = declaring_class.get_method(name) if not method: continue arguments_scheme = method.arguments_scheme try: try: params = self._evaluate_parameters( arguments_scheme, context, this, *args) except Exception as e: print e params = self._evaluate_parameters( arguments_scheme, context, this, *args) delegates.append(functools.partial( self._invoke_method_implementation, method, this, declaring_class, context, params)) except TypeError: continue if len(delegates) < 1: raise exceptions.NoMethodFound(name) elif len(delegates) > 1: raise exceptions.AmbiguousMethodName(name) else: return delegates[0]() def _invoke_method_implementation(self, method, this, murano_class, context, params): body = method.body if not body: return None current_thread = eventlet.greenthread.getcurrent() if not hasattr(current_thread, '_murano_dsl_thread_marker'): thread_marker = current_thread._murano_dsl_thread_marker = \ uuid.uuid4().hex else: thread_marker = current_thread._murano_dsl_thread_marker method_id = id(body) this_id = this.object_id event, marker = self._locks.get((method_id, this_id), (None, None)) if event: if marker == thread_marker: return self._invoke_method_implementation_gt( body, this, params, murano_class, context) event.wait() event = Event() self._locks[(method_id, this_id)] = (event, thread_marker) gt = eventlet.spawn(self._invoke_method_implementation_gt, body, this, params, murano_class, context, thread_marker) result = gt.wait() del self._locks[(method_id, this_id)] event.send() return result def _invoke_method_implementation_gt(self, body, this, params, murano_class, context, thread_marker=None): if thread_marker: current_thread = eventlet.greenthread.getcurrent() current_thread._murano_dsl_thread_marker = thread_marker if callable(body): if '_context' in inspect.getargspec(body).args: params['_context'] = self._create_context( this, murano_class, context, **params) if inspect.ismethod(body) and not body.__self__: return body(this, **params) else: return body(**params) elif isinstance(body, expressions.DslExpression): return self.execute(body, murano_class, this, context, **params) else: raise ValueError() def _evaluate_parameters(self, arguments_scheme, context, this, *args): arg_names = list(arguments_scheme.keys()) parameter_values = {} i = 0 for arg in args: value = helpers.evaluate(arg, context) if isinstance(value, types.TupleType) and len(value) == 2 and \ isinstance(value[0], types.StringTypes): name = value[0] value = value[1] if name not in arguments_scheme: raise TypeError() else: if i >= len(arg_names): raise TypeError() name = arg_names[i] i += 1 if callable(value): value = value() arg_spec = arguments_scheme[name] parameter_values[name] = arg_spec.validate( value, this, self._root_context, self._object_store) for name, arg_spec in arguments_scheme.iteritems(): if name not in parameter_values: if not arg_spec.has_default: raise TypeError() parameter_values[name] = arg_spec.validate( helpers.evaluate(arg_spec.default, context), this, self._root_context, self._object_store) return parameter_values def _create_context(self, this, murano_class, context, **kwargs): new_context = self._class_loader.create_local_context( parent_context=self._root_context, murano_class=murano_class) new_context.set_data(this) new_context.set_data(this, 'this') new_context.set_data(this, '?this') new_context.set_data(murano_class, '?type') new_context.set_data(context, '?callerContext') @EvalArg('obj', arg_type=MuranoObject) @EvalArg('property_name', arg_type=str) def obj_attribution(obj, property_name): return obj.get_property(property_name, murano_class) @EvalArg('prefix', str) @EvalArg('name', str) def validate(prefix, name): return murano_class.namespace_resolver.resolve_name( '%s:%s' % (prefix, name)) new_context.register_function(obj_attribution, '#operator_.') new_context.register_function(validate, '#validate') for key, value in kwargs.iteritems(): new_context.set_data(value, key) return new_context def execute(self, expression, murano_class, this, context, **kwargs): new_context = self._create_context( this, murano_class, context, **kwargs) return expression.execute(new_context, murano_class) def load(self, data): if not isinstance(data, types.DictionaryType): raise TypeError() self._attribute_store.load(data.get('Attributes') or []) return self._object_store.load(data.get('Objects') or {}, None, self._root_context)
import boto3 from datetime import datetime import pickle import json, os import logging import numpy BACKEND = os.getenv("BACKEND") COMPLETED_BUCKET = os.getenv("COMPLETED_BUCKET") PENDING_BUCKET = os.getenv("PENDING_BUCKET") # Initialize ObjectStore try: ob = ObjectStore() except Exception as ex: logging.error("Error is -", ex) # Initialize Backend def init_backend(): """Returns the backend to work with in the cron job""" token=os.getenv('BACKEND_TOKEN') IBMQ.save_account(token, overwrite=True) IBMQ.load_account() provider = IBMQ.get_provider(hub='ibm-q') IBMQ.get_provider(group='open') try: backend = provider.get_backend(BACKEND) except QiskitBackendNotFoundError as er: logging.error("Qiskit Backend not found, please check backend in .env.")
def object_store(self): if self._object_store is None: self._object_store = ObjectStore(self.object_dir()) return self._object_store