def wrapper(*args, **kwds): pickledfunc = dill.dumps(func) try: HOST, USER, PASSWORD = kwds['remote'][0], kwds['remote'][1], kwds['remote'][2] except: return func(*args, **kwds) PORT = random.randrange(10000, 20000) threads = [] t = Process(target=start_remote, args=(HOST,USER,PASSWORD,PORT)) t.start() time.sleep(2) sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((HOST, PORT)) modules_list = [itm for itm in func.func_globals.keys() if itm.count("_")== 0 and itm != func.__name__ and itm != 'remoteFunction'] h = hashlib.sha1() h.update(pickledfunc) hexdigestkey = h.hexdigest() del kwds['remote'] lis = [pickledfunc, modules_list, args, kwds, hexdigestkey] lis = dill.dumps(lis) sock.sendall(lis) response = sock.recv(4096) response = dill.loads(response) return response sock.close() t.terminate()
def dump(self): """Returns a serialization of the current job instance""" obj = {} obj['created_at'] = utcformat(self.created_at or utcnow()) obj['data'] = self.data if self.origin is not None: obj['origin'] = self.origin if self.description is not None: obj['description'] = self.description if self.enqueued_at is not None: obj['enqueued_at'] = utcformat(self.enqueued_at) if self.ended_at is not None: obj['ended_at'] = utcformat(self.ended_at) if self._result is not None: obj['result'] = dumps(self._result) if self.exc_info is not None: obj['exc_info'] = self.exc_info if self.timeout is not None: obj['timeout'] = self.timeout if self.result_ttl is not None: obj['result_ttl'] = self.result_ttl if self._status is not None: obj['status'] = self._status if self._dependency_id is not None: obj['dependency_id'] = self._dependency_id if self.meta: obj['meta'] = dumps(self.meta) return obj
def add_computation(project, entry, object, timeout=None): if isinstance(entry, Optimization): entry.status = Optimization.QUEUED entry.save() comp = ComputationQueue( project=project, type=ComputationQueue.OPTIM, computation_id=entry.id, object=dumps(object).decode('Latin-1'), timeout=timeout ) comp.save() elif isinstance(entry, Continuation): entry.status = Continuation.QUEUED entry.save() comp = ComputationQueue( project=project, type=ComputationQueue.CONT, computation_id=entry.id, object=dumps(object).decode('Latin-1'), timeout=timeout ) comp.save() update_queue()
def _convert(v): if dill is not None: try: dill.dumps(v) return v except: return _safe_repr(v) else: from datetime import date, time, datetime, timedelta if PY2: BUILTIN = (str, unicode, int, long, float, date, time, datetime, timedelta) else: BUILTIN = (str, int, float, date, time, datetime, timedelta) # XXX: what about bytes and bytearray? if v is None: return v if type(v) in BUILTIN: return v if type(v) is tuple: return tuple(_convert_seq(v)) if type(v) is list: return list(_convert_seq(v)) if type(v) is set: return set(_convert_seq(v)) if type(v) is dict: return _convert_dict(v) return _safe_repr(v)
def serialize(cust_obj): """A function to serialize custom objects passed to a model Args: cust_obj(callable): a custom layer or function to serialize Returns: a dict of the serialized components of the object""" ser_func = dict() if isinstance(cust_obj, types.FunctionType): func_code = six.get_function_code(cust_obj) func_code_d = dill.dumps(func_code).decode('raw_unicode_escape') ser_func['func_code_d'] = func_code_d ser_func['name_d'] = pickle.dumps( cust_obj.__name__).decode('raw_unicode_escape') ser_func['args_d'] = pickle.dumps( six.get_function_defaults(cust_obj)).decode('raw_unicode_escape') clos = dill.dumps( six.get_function_closure(cust_obj)).decode('raw_unicode_escape') ser_func['clos_d'] = clos ser_func['type_obj'] = 'func' else: if hasattr(cust_obj, '__module__'): # pragma: no cover cust_obj.__module__ = '__main__' ser_func['name_d'] = None ser_func['args_d'] = None ser_func['clos_d'] = None ser_func['type_obj'] = 'class' loaded = dill.dumps(cust_obj).decode('raw_unicode_escape') ser_func['func_code_d'] = loaded return ser_func
def _make_multi_process_batches(self, problem_iter): ''' we do two things here: -break tasks into batches to be multiprocessed. -multiprocess sometimes does not terminate properly so we observe how many tasks go in and terminate once that number of outs is reached. Parameters ---------- problem_iter: problems to put into the multiprocess queue Returns ------- yields a batchsize sized problem chunks ''' try: s = dill.dumps(self, byref=False) except Exception as exc: print exc print "dill dump failed in graphlearn.py (dill dies silently sometimes)" self.multiprocess_jobcount = 0 self.multiprocess_all_prepared = False for e in grouper(problem_iter, self.batch_size): # cant just take batch size here because output of nons will be suppressed problems = [1 for problem in e if problem != None] self.multiprocess_jobcount += sum(problems) batch = dill.dumps(e) yield (s, batch) self.multiprocess_all_prepared = True
def handle(self): import sys data = self.request.recv(4096) cur_thread = threading.current_thread() response = "{}: {}".format(cur_thread.name, data) rec = dill.loads(data) modulesNames = rec[1] h = hashlib.sha1() h.update(rec[0]) hexdigestkey = h.hexdigest() if hexdigestkey != rec[4]: sys.exit(1) remfunc = dill.loads(rec[0]) for mod_name in modulesNames: remfunc.__globals__[mod_name] = import_module(mod_name) try : res = remfunc(*rec[2], **rec[3]) except: msgerr= sys.exc_info()[0] res_pack = dill.dumps(msgerr) self.request.sendall(res_pack) self.server.shutdown() res_pack = dill.dumps(res) self.request.sendall(res_pack) self.server.shutdown()
def func(self, value): if inspect.ismethod(value) or inspect.isfunction(value) or inspect.isbuiltin(value): self._func = dumps(value) else: # we expect a string self._func = dumps(import_attribute(value)) if inspect.ismethod(value): self._instance = value.__self__
def test_pickling_examples(): try: import dill except ImportError: raise pytest.skip('requires dill') dill.loads(dill.dumps(det)) dill.loads(dill.dumps(motor)) dill.loads(dill.dumps(flyer1))
def dumps(o): try: return base64.b64encode(dill.dumps(o)) except Exception: # pylint: disable=broad-except dill.dill._trace(True) # pylint: disable=protected-access return base64.b64encode(dill.dumps(o)) finally: dill.dill._trace(False) # pylint: disable=protected-access
def check_coder(self, coder, *values): self._observe(coder) for v in values: self.assertEqual(v, coder.decode(coder.encode(v))) copy1 = dill.loads(dill.dumps(coder)) copy2 = dill.loads(dill.dumps(coder)) for v in values: self.assertEqual(v, copy1.decode(copy2.encode(v)))
def test_picklemap(): encode = picklemap(typed=False, flat=True, serializer='dill') assert encode(*args, **kwds) == dumps((1, 2, 'a', 3, 'b', 4)) encode = picklemap(typed=False, flat=False, serializer='dill') assert encode(*args, **kwds) == dumps((args, kwds)) encode = picklemap(typed=True, flat=True, serializer='dill') assert encode(*args, **kwds) == dumps( (1, 2, 'a', 3, 'b', 4, type(1), type(2), type(3), type(4)) ) encode = picklemap(typed=True, flat=False, serializer='dill') assert encode(*args, **kwds) == dumps( (args, kwds, (type(1), type(2)), (type(3), type(4))) )
def test_basic(): a = [0, 1, 2] pa = pickle.dumps(a) pmath = pickle.dumps(math) #XXX: FAILS in pickle pmap = pickle.dumps(map) # ... la = pickle.loads(pa) lmath = pickle.loads(pmath) lmap = pickle.loads(pmap) assert list(map(math.sin, a)) == list(lmap(lmath.sin, la))
def test_array_nested(): try: import numpy as np x = np.array([1]) y = (x,) dill.dumps(x) assert y == dill.loads(dill.dumps(y)) except ImportError: pass
def new_func(*args, **kwargs): # if the "flush" kwarg is passed, recompute regardless of whether # the result is cached if "flush" in list(kwargs.keys()): kwargs.pop("flush", None) key = (dill.dumps(args), frozenset(list(kwargs.items())), frozenset(list(closure_dict.items()))) compute(key) key = (dill.dumps(args), frozenset(list(kwargs.items())), frozenset(list(closure_dict.items()))) if key not in cache: compute(key) return cache[key]
def test_memoized(): @memoized(cache=sql_archive()) def add(x,y): return x+y add(1,2) add(1,2) add(1,3) #print ("sql_cache = %s" % add.__cache__()) _key4 = '((), '+str({'y':3, 'x':1})+')' _key3 = '((), '+str({'y':2, 'x':1})+')' key4_ = '((), '+str({'x':1, 'y':3})+')' key3_ = '((), '+str({'x':1, 'y':2})+')' assert add.__cache__() == {_key4: 4, _key3: 3} or {key4_: 4, key3_: 3} @memoized(cache=dict_archive(cached=False)) # use archive backend 'direcly' def add(x,y): return x+y add(1,2) add(1,2) add(1,3) #print ("dict_cache = %s" % add.__cache__()) assert add.__cache__() == {_key4: 4, _key3: 3} or {key4_: 4, key3_: 3} @memoized(cache=dict()) def add(x,y): return x+y add(1,2) add(1,2) add(1,3) #print ("dict_cache = %s" % add.__cache__()) assert add.__cache__() == {_key4: 4, _key3: 3} or {key4_: 4, key3_: 3} @memoized(cache=add.__cache__()) def add(x,y): return x+y add(1,2) add(2,2) #print ("re_dict_cache = %s" % add.__cache__()) _key2 = '((), '+str({'y':2, 'x':2})+')' key2_ = '((), '+str({'x':2, 'y':2})+')' assert add.__cache__() == {_key4: 4, _key3: 3, _key2: 4} or {key4_: 4, key3_: 3, key2_: 4} @memoized(keymap=dumps) def add(x,y): return x+y add(1,2) add(1,2) add(1,3) #print ("pickle_dict_cache = %s" % add.__cache__()) _pkey4 = dill.dumps(eval(_key4)) _pkey3 = dill.dumps(eval(_key3)) pkey4_ = dill.dumps(eval(key4_)) pkey3_ = dill.dumps(eval(key3_)) assert add.__cache__() == {_pkey4: 4, _pkey3: 3} or {pkey4_: 4, pkey3_: 3}
def setUpClass(cls): # super(TestStore, cls).setUpClass() cls.env = dict(os.environ) cls.store_data = {} cls.store_data[StoreProperty.MODE] = Mode.MANY_TASKS cls.store_data[StoreProperty.TASK_COUNT] = 10 cls.store_data[StoreProperty.TASK_NO(0)] = dill.dumps(lambda: 0) cls.store_data[StoreProperty.TASK_NO(1)] = dill.dumps(lambda: 1) cls.saved_store = cls.STORE_TYPE.save_store(cls.store_data) environ.update(cls.saved_store)
def test_data_property_sets_job_properties(self): """Job tuple gets derived lazily from data property.""" job = Job() def foo(a,b,c,bar=''): pass job.data = dumps((dumps(foo), None, (1, 2, 3), {'bar': 'qux'})) self.assertEquals(job.func_name, 'tests.test_job.foo') self.assertEquals(job.instance, None) self.assertEquals(job.args, (1, 2, 3)) self.assertEquals(job.kwargs, {'bar': 'qux'})
def test_getattr_gets_from_cache(self): workflow_id = uuid4() context = {'workflow_execution_id': workflow_id} app = AppBase('Something', self.device2.id, context) app._cache = self.cache app.foo = 42 app.bar = 23 self.cache.set(app._format_cache_key('foo'), dill.dumps('a')) self.cache.set(app._format_cache_key('bar'), dill.dumps('b')) self.assertEqual(app.foo, 'a') self.assertEqual(app.bar, 'b') with self.assertRaises(AttributeError): y = app.baz
def test_lambdas_pickle(self): NONLOCAL_CONST = 5 lambda_func = lambda x, LOCAL_CONST=7: \ x * LOCAL_CONST * NONLOCAL_CONST * self.CLASS_CONST * GLOBAL_CONST def nested_func(x, LOCAL_CONST=7): return x * LOCAL_CONST * NONLOCAL_CONST * self.CLASS_CONST * GLOBAL_CONST self.assertEqual(lambda_func(11), pickle.loads(pickle.dumps(lambda_func))(11)) self.assertEqual(nested_func(11), pickle.loads(pickle.dumps(nested_func))(11))
def save(self, key, value, expire=0): if not hasattr(self, "storage"): self.bootstrap_storage() try: if expire: ret = self.storage.setex(key, pickle.dumps(value), expire) else: ret = self.storage.set(key, pickle.dumps(value)) return ret except: logging.critical("Unable to save %s: \n%s" % (key, traceback.format_exc()))
def is_serializable(func, raise_errors=True): if raise_errors is None: raise_errors = True try: serializer.dumps(func, PROTOCOL) return True except (AttributeError, serializer.PicklingError): if raise_errors: raise serializer.PicklingError( "Function {} is not serializable. " "Try installing dill or passing raise_errors to False " "for non-parallel execution when serialization fails." .format(str(func))) return False
def add_job_to_queue(self, job, route, ts=None): if ts: raise NotImplementedError("fire_at is currently not supported by foundation") queue_name = job.queue.name job.job_id = str(id(job)) job_flat = job.json() job_flat['now'] = datetime.utcnow() if job.queue.queue_type == 'broadcast': for worker in self._get_workers(self.skunkdb): queue = '__WORKERQUEUE-'+worker['worker_id'] self._get_queue(queue).push(self.conn, dill.dumps(job_flat)) else: queue = queue_name + '-' + route self._get_queue(queue).push(self.conn, dill.dumps(job_flat))
def generate_trace_pkt(trace_entries, color, r_id): """ Receives the REST/PUT to generate a PacketOut data needs to be serialized. The goal is always to create a packet with data being the TraceMsg to differentiate different traces running in parallel. We will stack layers depending of the user request. If user submits just a VLAN ID, we will use ethertype 88b5 and add TraceMsg after it. Same for IP, however the protocol will be 65535. If user provides all the way to TCP/UDP we will add TraceMsg after it. First thing is to discover what it is that the user has provided. Args: trace_entries: TraceEntries provided by user or collected from PacketIn color: result from Coloring Napp for a specific DPID r_id: request ID Returns: in_port: in_port pkt: serialized Ethernet frame """ ethernet = _create_ethernet_frame(trace_entries, color) msg = TraceMsg(r_id) if ethernet.ether_type == constants.IPV4: ip_pkt = _create_ip_packet(trace_entries) if ip_pkt.protocol == constants.TCP: # No dissector for TCP yet ip_pkt.data = dill.dumps(msg) # tp_pkt = _create_tcp_packet(trace_entries) # ip_pkt.data = tp_pkt.pack() elif ip_pkt.protocol == constants.UDP: # No dissector for UDP yet ip_pkt.data = dill.dumps(msg) # tp_pkt = _create_udp_packet(trace_entries) # ip_pkt.data = tp_pkt.pack() else: ip_pkt.data = dill.dumps(msg) ethernet.data = ip_pkt.pack() else: ethernet.data = dill.dumps(msg) pkt = ethernet.pack() return trace_entries.in_port, pkt
def test_serializability(): state = da.random.RandomState(5) x = state.normal(10, 1, size=10, chunks=5) y = dill.loads(dill.dumps(x)) assert (x.compute() == y.compute()).all()
def check_coder(self, coder, *values): self._observe(coder) for v in values: self.assertEqual(v, coder.decode(coder.encode(v))) self.assertEqual(coder.estimate_size(v), len(coder.encode(v))) self.assertEqual(coder.estimate_size(v), coder.get_impl().estimate_size(v)) self.assertEqual(coder.get_impl().get_estimated_size_and_observables(v), (coder.get_impl().estimate_size(v), [])) copy1 = dill.loads(dill.dumps(coder)) copy2 = dill.loads(dill.dumps(coder)) for v in values: self.assertEqual(v, copy1.decode(copy2.encode(v))) if coder.is_deterministic(): self.assertEqual(copy1.encode(v), copy2.encode(v))
def calc_hash(argument): argument_string = pickle.dumps(argument) signature = argument_string hasher = hashlib.sha256() hasher.update(signature) hash_string = hasher.hexdigest() return hash_string
def send_to_scheduler(self, header, payload): log(self.address, 'Send to scheduler', header) if 'address' not in header: header['address'] = self.address header['timestamp'] = datetime.utcnow() header['loads'] = dill.loads self.socket.send_multipart([pickle.dumps(header), dill.dumps(payload)])
def _process_work(msg): # Check for sentinel if msg.reply_to == "": channel.basic_cancel(consumer) body = pickle.loads(msg.body) mapid = body['mapid'] if mapid not in _cache: _fetch_function(msg.reply_to, mapid) function = _cache[mapid] if function == None: channel.basic_ack(msg.delivery_tag) return # Acknowledge delivery of message #print "processing...",body['index'],body['value'] try: result = function(body['value']) except: result = None #print "done" channel.basic_ack(msg.delivery_tag) reply = dict(index=body['index'], result=result, mapid=mapid) replymsg = amqp.Message(pickle.dumps(reply)) channel.basic_publish(replymsg, exchange=exchange, routing_key=msg.reply_to)
def as_dictionary(self, fullcopy=True): """Returns component as a dictionary For more information on method and conventions, see :meth:`hyperspy.misc.export_dictionary.export_to_dictionary` Parameters ---------- fullcopy : Bool (optional, False) Copies of objects are stored, not references. If any found, functions will be pickled and signals converted to dictionaries Returns ------- dic : dictionary A dictionary, containing at least the following fields: parameters : list a list of dictionaries of the parameters, one per _whitelist : dictionary a dictionary with keys used as references saved attributes, for more information, see :meth:`hyperspy.misc.export_dictionary.export_to_dictionary` * any field from _whitelist.keys() * """ dic = { 'parameters': [ p.as_dictionary(fullcopy) for p in self.parameters]} export_to_dictionary(self, self._whitelist, dic, fullcopy) from hyperspy.model import components if self._id_name not in components.__dict__.keys(): import dill dic['_class_dump'] = dill.dumps(self.__class__) return dic
def test_worker_initialize(self) -> None: linter = PyLinter(reporter=Reporter()) worker_initialize(linter=dill.dumps(linter)) assert isinstance(pylint.lint.parallel._worker_linter, type(linter))
def run_experiment(problem_size, shard_size, pipeline, num_priorities, lru, eager, truncate, max_cores, start_cores, trial, launch_granularity, timeout, log_granularity, autoscale_policy, standalone, warmup, verify, matrix_exists, read_limit, write_limit, compute_threads_per_worker): # set up logging invoke_executor = fs.ThreadPoolExecutor(1) logger = logging.getLogger() region = wc.default()["account"]["aws_region"] print("REGION", region) for key in logging.Logger.manager.loggerDict: logging.getLogger(key).setLevel(logging.CRITICAL) logger.setLevel(logging.DEBUG) arg_bytes = pickle.dumps( (problem_size, shard_size, pipeline, num_priorities, lru, eager, truncate, max_cores, start_cores, trial, launch_granularity, timeout, log_granularity, autoscale_policy, read_limit, write_limit)) arg_hash = hashlib.md5(arg_bytes).hexdigest() log_file = "{0}.log".format(arg_hash) fh = logging.FileHandler(log_file) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') fh.setFormatter(formatter) ch = logging.StreamHandler() ch.setLevel(logging.INFO) ch.setFormatter(formatter) logger.addHandler(fh) logger.addHandler(ch) logger.info("Logging to {0}".format(log_file)) if standalone: extra_env = { "AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"].strip(), "AWS_SECRET_ACCESS_KEY": os.environ["AWS_SECRET_ACCESS_KEY"].strip(), "OMP_NUM_THREADS": "1", "AWS_DEFAULT_REGION": region } config = wc.default() config['runtime']['s3_bucket'] = 'numpywrenpublic' key = "pywren.runtime/pywren_runtime-3.6-numpywren.tar.gz" config['runtime']['s3_key'] = key pwex = pywren.standalone_executor(config=config) else: extra_env = {"AWS_DEFAULT_REGION": region} config = wc.default() config['runtime']['s3_bucket'] = 'numpywrenpublic' key = "pywren.runtime/pywren_runtime-3.6-numpywren.tar.gz" config['runtime']['s3_key'] = key print(config) pwex = pywren.default_executor(config=config) if (not matrix_exists): X = np.random.randn(problem_size, 1) shard_sizes = [shard_size, 1] X_sharded = BigMatrix("qr_test_{0}_{1}".format(problem_size, shard_size), shape=X.shape, shard_sizes=shard_sizes, write_header=True, autosqueeze=False, bucket="numpywrennsdi2") shard_matrix(X_sharded, X) print("Generating PSD matrix...") t = time.time() print(X_sharded.shape) XXT_sharded = binops.gemm(pwex, X_sharded, X_sharded.T, overwrite=False) e = time.time() print("GEMM took {0}".format(e - t)) else: X_sharded = BigMatrix("qr_test_{0}_{1}".format(problem_size, shard_size), autosqueeze=False, bucket="numpywrennsdi2") key_name = binops.generate_key_name_binop(X_sharded, X_sharded.T, "gemm") XXT_sharded = BigMatrix(key_name, hash_keys=False, bucket="numpywrensdi2") XXT_sharded.lambdav = problem_size * 10 t = time.time() program, meta = qr(XXT_sharded) pipeline_width = args.pipeline if (lru): cache_size = 5 else: cache_size = 0 pywren_config = pwex.config e = time.time() print("Program compile took {0} seconds".format(e - t)) print("program.hash", program.hash) REDIS_CLIENT = program.control_plane.client done_counts = [] ready_counts = [] post_op_counts = [] not_ready_counts = [] running_counts = [] sqs_invis_counts = [] sqs_vis_counts = [] up_workers_counts = [] busy_workers_counts = [] read_objects = [] write_objects = [] all_read_timeouts = [] all_write_timeouts = [] all_redis_timeouts = [] times = [time.time()] flops = [0] reads = [0] writes = [0] print("LRU", lru) print("eager", eager) exp = {} exp["redis_done_counts"] = done_counts exp["redis_ready_counts"] = ready_counts exp["redis_post_op_counts"] = post_op_counts exp["redis_not_ready_counts"] = not_ready_counts exp["redis_running_counts"] = running_counts exp["sqs_invis_counts"] = sqs_invis_counts exp["sqs_vis_counts"] = sqs_vis_counts exp["busy_workers"] = busy_workers_counts exp["up_workers"] = up_workers_counts exp["times"] = times exp["lru"] = lru exp["priority"] = num_priorities exp["eager"] = eager exp["truncate"] = truncate exp["max_cores"] = max_cores exp["problem_size"] = problem_size exp["shard_size"] = shard_size exp["pipeline"] = pipeline exp["flops"] = flops exp["reads"] = reads exp["writes"] = writes exp["read_objects"] = read_objects exp["write_objects"] = write_objects exp["read_timeouts"] = all_read_timeouts exp["write_timeouts"] = all_write_timeouts exp["redis_timeouts"] = all_redis_timeouts exp["trial"] = trial exp["launch_granularity"] = launch_granularity exp["log_granularity"] = log_granularity exp["autoscale_policy"] = autoscale_policy exp["standalone"] = standalone exp["program"] = program exp["time_steps"] = 1 exp["failed"] = False program.start() t = time.time() logger.info("Starting with {0} cores".format(start_cores)) all_futures = pwex.map( lambda x: job_runner.lambdapack_run(program, pipeline_width=pipeline_width, cache_size=cache_size, timeout=timeout), range(start_cores), extra_env=extra_env) start_time = time.time() last_run_time = start_time print(program.program_status()) print("QUEUE URLS", len(program.queue_urls)) total_lambda_epochs = start_cores try: while (program.program_status() == lp.PS.RUNNING): time.sleep(log_granularity) curr_time = int(time.time() - start_time) p = program.get_progress() if (p is None): print("no progress...") continue else: p = int(p) times.append(int(time.time())) max_pc = p waiting = 0 running = 0 for i, queue_url in enumerate(program.queue_urls): client = boto3.client('sqs') attrs = client.get_queue_attributes( QueueUrl=queue_url, AttributeNames=[ 'ApproximateNumberOfMessages', 'ApproximateNumberOfMessagesNotVisible' ])['Attributes'] waiting += int(attrs["ApproximateNumberOfMessages"]) running += int(attrs["ApproximateNumberOfMessagesNotVisible"]) sqs_invis_counts.append(running) sqs_vis_counts.append(waiting) busy_workers = REDIS_CLIENT.get("{0}_busy".format(program.hash)) if (busy_workers == None): busy_workers = 0 else: busy_workers = int(busy_workers) up_workers = program.get_up() if (up_workers == None): up_workers = 0 else: up_workers = int(up_workers) up_workers_counts.append(up_workers) busy_workers_counts.append(busy_workers) logger.debug("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) if ((curr_time % INFO_FREQ) == 0): logger.info("Waiting: {0}, Currently Processing: {1}".format( waiting, running)) logger.info("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) current_gflops = program.get_flops() if (current_gflops is None): current_gflops = 0 else: current_gflops = int(current_gflops) / 1e9 flops.append(current_gflops) current_gbytes_read = program.get_read() if (current_gbytes_read is None): current_gbytes_read = 0 else: current_gbytes_read = int(current_gbytes_read) / 1e9 reads.append(current_gbytes_read) current_gbytes_write = program.get_write() if (current_gbytes_write is None): current_gbytes_write = 0 else: current_gbytes_write = int(current_gbytes_write) / 1e9 writes.append(current_gbytes_write) gflops_rate = flops[-1] / (times[-1] - times[0]) greads_rate = reads[-1] / (times[-1] - times[0]) gwrites_rate = writes[-1] / (times[-1] - times[0]) b = XXT_sharded.shard_sizes[0] current_objects_read = (current_gbytes_read * 1e9) / (b * b * 8) current_objects_write = (current_gbytes_write * 1e9) / (b * b * 8) read_objects.append(current_objects_read) write_objects.append(current_objects_write) read_rate = read_objects[-1] / (times[-1] - times[0]) write_rate = write_objects[-1] / (times[-1] - times[0]) avg_workers = np.mean(up_workers_counts) smooth_len = 10 if (len(flops) > smooth_len + 5): gflops_rate_5_min_window = (flops[-1] - flops[-smooth_len]) / ( times[-1] - times[-smooth_len]) gread_rate_5_min_window = (reads[-1] - reads[-smooth_len]) / ( times[-1] - times[-smooth_len]) gwrite_rate_5_min_window = ( writes[-1] - writes[-smooth_len]) / (times[-1] - times[-smooth_len]) read_rate_5_min_window = (read_objects[-1] - read_objects[-smooth_len]) / ( times[-1] - times[-smooth_len]) write_rate_5_min_window = (write_objects[-1] - write_objects[-smooth_len]) / ( times[-1] - times[-smooth_len]) workers_5_min_window = np.mean(up_workers_counts[-smooth_len:]) else: gflops_rate_5_min_window = "N/A" gread_rate_5_min_window = "N/A" gwrite_rate_5_min_window = "N/A" workers_5_min_window = "N/A" read_rate_5_min_window = "N/A" write_rate_5_min_window = "N/A" read_timeouts = int(parse_int( REDIS_CLIENT.get("s3.timeouts.read"))) write_timeouts = int( parse_int(REDIS_CLIENT.get("s3.timeouts.write"))) redis_timeouts = int(parse_int(REDIS_CLIENT.get("redis.timeouts"))) all_read_timeouts.append(read_timeouts) all_write_timeouts.append(write_timeouts) all_redis_timeouts.append(redis_timeouts) read_timeouts_fraction = read_timeouts / (current_objects_read + 1e-8) write_timeouts_fraction = write_timeouts / (current_objects_write + 1e-8) print("=======================================") print("Max PC is {0}".format(max_pc)) print("Waiting: {0}, Currently Processing: {1}".format( waiting, running)) print("{2}: Up Workers: {0}, Busy Workers: {1}".format( up_workers, busy_workers, curr_time)) print( "{0}: Total GFLOPS {1}, Total GBytes Read {2}, Total GBytes Write {3}" .format(curr_time, current_gflops, current_gbytes_read, current_gbytes_write)) print( "{0}: Average GFLOPS rate {1}, Average GBytes Read rate {2}, Average GBytes Write rate {3}, Average Worker Count {4}" .format(curr_time, gflops_rate, greads_rate, gwrites_rate, avg_workers)) print("{0}: Average read txns/s {1}, Average write txns/s {2}". format(curr_time, read_rate, write_rate)) print( "{0}: smoothed GFLOPS rate {1}, smoothed GBytes Read rate {2}, smoothed GBytes Write rate {3}, smoothed Worker Count {4}" .format(curr_time, gflops_rate_5_min_window, gread_rate_5_min_window, gwrite_rate_5_min_window, workers_5_min_window)) print("{0}: smoothed read txns/s {1}, smoothed write txns/s {2}". format(curr_time, read_rate_5_min_window, write_rate_5_min_window)) print( "{0}: Read timeouts: {1}, Write timeouts: {2}, Redis timeouts: {3} " .format(curr_time, read_timeouts, write_timeouts, redis_timeouts)) print( "{0}: Read timeouts fraction: {1}, Write timeouts fraction: {2}" .format(curr_time, read_timeouts_fraction, write_timeouts_fraction)) print("=======================================") time_since_launch = time.time() - last_run_time if (time_since_launch > (0.85 * timeout)): break exp["time_steps"] += 1 except KeyboardInterrupt: exp["failed"] = True program.stop() pass except Exception as e: traceback.print_exc() exp["failed"] = True program.stop() raise pass print("killing program...") print(program.program_status()) exp["all_futures"] = all_futures exp_bytes = dill.dumps(exp) client = boto3.client('s3') client.put_object(Key="lambdapack/{0}/runtime.pickle".format(program.hash), Body=exp_bytes, Bucket=program.bucket) print("=======================") print("=======================") print("Execution Summary:") print("Executed Program ID: {0}".format(program.hash)) print("Program Success: {0}".format((not exp["failed"]))) print("Problem Size: {0}".format(exp["problem_size"])) print("Shard Size: {0}".format(exp["shard_size"])) print("Total Execution time: {0}".format(times[-1] - times[0])) print("Average Flop Rate (GFlop/s): {0}".format(exp["flops"][-1] / (times[-1] - times[0]))) with open("/tmp/last_run", "w+") as f: f.write(program.hash)
def handle_client(client_socket): while True: try: print("here once again") message_length=int((client_socket.recv(HEADER_LENGTH).strip()).decode("utf-8")) objectx=client_socket.recv(message_length) while True: modify_list.acquire() if(len(online_servers)>1): server_socket1=online_servers.pop(0) server_socket2=online_servers.pop(0) modify_list.release() break else: pass modify_list.release() time.sleep(0.5) object_decode=pickle.loads(objectx) object_decode1=copy.deepcopy(object_decode) object_decode2=copy.deepcopy(object_decode) print(object_decode) indexx=len(object_decode.data) object_decode1.data=object_decode.data[0:int(indexx/2)] object_decode2.data=object_decode.data[int(indexx/2):indexx] print(object_decode1.data,object_decode1) print(object_decode2.data,object_decode2) objectx1=pickle.dumps(object_decode1) objectx2=pickle.dumps(object_decode2) objectx_length=f"{len(objectx):<{HEADER_LENGTH}}".encode("utf-8") objectx1_length=f"{len(objectx1):<{HEADER_LENGTH}}".encode("utf-8") objectx2_length=f"{len(objectx2):<{HEADER_LENGTH}}".encode("utf-8") server_socket1.send(objectx1_length+objectx1) server_socket2.send(objectx2_length+objectx2) print("Sent") objectx1_length=int(server_socket1.recv(HEADER_LENGTH).strip().decode("utf-8")) objectx1=server_socket1.recv(objectx1_length) objectx1=pickle.loads(objectx1) print("Here") print(objectx1) objectx2_length=int(server_socket2.recv(HEADER_LENGTH).strip().decode("utf-8")) print("Here1") print(objectx2_length) objectx2=server_socket2.recv(objectx2_length) print("Here2") objectx2=pickle.loads(objectx2) print("Received") #Combine Function object_decode.data=[] object_decode.data.append(objectx1.processed_data) object_decode.data.append(objectx2.processed_data) object_decode.processed_data=object_decode.function(object_decode.data) objectx=pickle.dumps(object_decode) objectx_length=f"{len(objectx):<{HEADER_LENGTH}}".encode("utf-8") client_socket.send(objectx_length+objectx) print("Sent to Client") modify_list.acquire() online_servers.append(server_socket1) online_servers.append(server_socket2) modify_list.release() print("Reavailable Server") except IOError as e: if e.errno!=errno.EAGAIN and e.errno!=errno.EWOULDBLOCK: print('READING ERROR , Client Must have Ended the Connection',str(e)) server_socket.close() client_socket.close() break except Exception as e: client_socket.close() server_socket.close() print("Error I am here",e) break
def run_dill_encoded(payload: bytes) -> bytes: fun, args, kwargs = dill.loads(payload) return dill.dumps(fun(*args, **kwargs))
def save(self, key, value, expire=None): self.bootstrap_storage() try: return self.storage.save(key, pickle.dumps(value), expire=expire) except Exception: logging.exception("Unable to save %s", key)
def test_excel_model(self): start = time.time() _msg = '[info] test_excel_model: ' xl_mdl = ExcelModel() print('\n%sLoading excel-model.' % _msg) s = time.time() xl_mdl.loads(self.filename) xl_mdl.add_book(self.link_filename) msg = '%sLoaded excel-model in %.2fs.\n%sFinishing excel-model.' print(msg % (_msg, time.time() - s, _msg)) s = time.time() xl_mdl.finish() print('%sFinished excel-model in %.2fs.' % (_msg, time.time() - s)) n_test = 0 for i in range(3): print('%sCalculate excel-model.' % _msg) s = time.time() xl_mdl.calculate({"'[EXTRA.XLSX]EXTRA'!A1:B1": [[1, 1]]}) msg = '%sCalculated excel-model in %.2fs.\n%s' \ 'Comparing overwritten results.' print(msg % (_msg, time.time() - s, _msg)) s = time.time() books = _res2books(xl_mdl.write(xl_mdl.books)) n_test += self._compare(books, self.results) msg = '%sCompared overwritten results in %.2fs.\n' \ '%sComparing fresh written results.' print(msg % (_msg, time.time() - s, _msg)) s = time.time() n_test += self._compare(_res2books(xl_mdl.write()), self.results) msg = '%sCompared fresh written results in %.2fs.' print(msg % (_msg, time.time() - s)) if i == 0: print('%sSaving excel-model dill.' % _msg) s = time.time() xl_copy = dill.dumps(xl_mdl) msg = '%sSaved excel-model dill in %.2fs.\n' \ '%sLoading excel-model dill.' print(msg % (_msg, time.time() - s, _msg)) s = time.time() xl_mdl = dill.loads(xl_copy) del xl_copy msg = '%sLoaded excel-model dill in %.2fs.' print(msg % (_msg, time.time() - s)) elif i == 1: print('%sDeep-copying excel-model.' % _msg) s = time.time() xl_mdl = copy.deepcopy(xl_mdl) msg = '%sDeep-copied excel-model in %.2fs.' print(msg % (_msg, time.time() - s)) print('%sSaving excel-model xlsx.' % _msg) s = time.time() dirpath = osp.join(mydir, 'tmp') xl_mdl.write(dirpath=dirpath) msg = '%sSaved excel-model exls in %.2fs.\n%sComparing saved results.' print(msg % (_msg, time.time() - s, _msg)) s = time.time() n_test += self._compare( _file2books(*(osp.join(dirpath, fp) for fp in xl_mdl.books)), self.results) msg = '%sCompared saved results in %.2fs.\n%sRan %d tests in %.2fs' print(msg % (_msg, time.time() - s, _msg, n_test, time.time() - start))
def test_load_dump(self): with testing.tmp_dir(self.get_temp_dir()) as tmp_dir: builder = testing.DummyMnist(data_dir=tmp_dir) builder2 = dill.loads(dill.dumps(builder)) self.assertEqual(builder.name, builder2.name) self.assertEqual(builder.version, builder2.version)
def write_dill_compressed(file_name, data): with gzip.GzipFile(file_name, 'w') as fout: fout.write(dill.dumps(data))
import pickle import dill # pip install dill square = lambda x: x * x # my_pickle = pickle.dumps(square) my_pickle = dill.dumps(square) print(my_pickle)
def test_r_error_pickling(): import dill # loads failed because the old constructor or RError had no "default constructor" err = dill.loads(dill.dumps(RError("test"))) assert err.value == "test"
def __getstate__(self): import dill return dill.dumps(self.func)
def get_message_string(message): """ Return a string that can be sent over the network. """ return pickle.dumps(message)
def _make_default_run_script( url: str, learners_file: str, save_interval: int, log_interval: int, goal: Optional[Callable[[adaptive.BaseLearner], bool]] = None, runner_kwargs: Optional[Dict[str, Any]] = None, run_script_fname: str = "run_learner.py", executor_type: str = "mpi4py", ) -> None: default_runner_kwargs = dict(shutdown_executor=True) runner_kwargs = dict(default_runner_kwargs, goal=goal, **(runner_kwargs or {})) serialized_runner_kwargs = dill.dumps(runner_kwargs) if executor_type == "mpi4py": import_line = "from mpi4py.futures import MPIPoolExecutor" executor_line = "MPIPoolExecutor()" elif executor_type == "ipyparallel": import_line = "from adaptive_scheduler.utils import connect_to_ipyparallel" executor_line = "connect_to_ipyparallel(profile=args.profile, n=args.n)" elif executor_type == "dask-mpi": try: import dask_mpi # noqa: F401 except ModuleNotFoundError as e: msg = "You need to have 'dask-mpi' installed to use `executor_type='dask-mpi'`." raise Exception(msg) from e import_line = "from distributed import Client" executor_line = "Client()" else: raise NotImplementedError("Use 'ipyparallel', 'dask-mpi' or 'mpi4py'.") if os.path.abspath(os.path.dirname(learners_file)) != os.path.abspath(""): raise RuntimeError(f"The {learners_file} needs to be in the same" " directory as where this is run from.") learners_module = os.path.splitext(os.path.basename(learners_file))[0] template = textwrap.dedent(f"""\ #!/usr/bin/env python3 # {run_script_fname}, automatically generated # by `adaptive_scheduler.server_support._make_default_run_script()`. import argparse from contextlib import suppress import adaptive import dill from adaptive_scheduler import client_support {import_line} # the file that defines the learners we created above from {learners_module} import learners, fnames if __name__ == "__main__": # ← use this, see warning @ https://bit.ly/2HAk0GG # parse arguments parser = argparse.ArgumentParser() parser.add_argument("--profile", action="store", dest="profile", type=str) parser.add_argument("--n", action="store", dest="n", type=int) parser.add_argument("--log-fname", action="store", dest="log_fname", type=str) parser.add_argument("--job-id", action="store", dest="job_id", type=str) parser.add_argument("--name", action="store", dest="name", type=str) args = parser.parse_args() # the address of the "database manager" url = "{url}" # ask the database for a learner that we can run which we log in `args.log_fname` learner, fname = client_support.get_learner( learners, fnames, url, args.log_fname, args.job_id, args.name ) # load the data with suppress(Exception): learner.load(fname) # connect to the executor executor = {executor_line} # this is serialized by dill.dumps runner_kwargs = dill.loads({serialized_runner_kwargs}) # run until `some_goal` is reached with an `MPIPoolExecutor` runner = adaptive.Runner(learner, executor=executor, **runner_kwargs) # periodically save the data (in case the job dies) runner.start_periodic_saving(dict(fname=fname), interval={save_interval}) # log progress info in the job output script, optional client_support.log_info(runner, interval={log_interval}) # block until runner goal reached runner.ioloop.run_until_complete(runner.task) # save once more after the runner is done learner.save(fname) # tell the database that this learner has reached its goal client_support.tell_done(url, fname) """) if executor_type == "dask-mpi": template = "from dask_mpi import initialize; initialize()\n" + template with open(run_script_fname, "w") as f: f.write(template)
def test_reduce(self): with pytest.raises(RuntimeError, match="not reducible"): dill.dumps(ThreadPool(processes=2))
def push(self, msg, key=''): key = self._key if not key else key return self._queue.rpush(key, pickle.dumps(msg))
def set_object(self, x): self.object = pickle.dumps(x)
def __setitem__(self, key, value): self.cache[key] = value filepath = os.path.join(self.paramdir, get_box_name(self.name), key) with open(filepath, "wb") as fp: serialized = dill.dumps(value) fp.write(serialized)
def test_serializing_and_deserializing_unconfigured_robot(self): robot_as_bytes = dill.dumps(self.robot) self.assertIsInstance(robot_as_bytes, bytes) dill.loads(robot_as_bytes)
def enqueue(self, obj, *args, **kwargs): """Serialize the function call and place it in the Kafka topic. For example: .. code-block:: python import requests from kq import Queue q = Queue() # You can queue the function call with its arguments job = q.enqueue(requests.get, 'https://www.google.com') # Or you can queue a kq.job.Job instance directly q.enqueue(job) :param obj: Function or the job object to enqueue. If a function is given, the function *must* be pickle-able. :type obj: callable | kq.job.Job :param args: Arguments for the function. Ignored if a KQ job object is given for the first argument instead. :type args: list :param kwargs: Keyword arguments for the function. Ignored if a KQ job instance is given as the first argument instead. :type kwargs: dict :param key: Queue the job with a key. Jobs queued with a specific key are processed in order they were queued. Setting it to None (default) disables this behaviour. :type key: str | unicode :return: The job that was enqueued :rtype: kq.job.Job """ key = None if isinstance(obj, Job): func = obj.func args = obj.args kwargs = obj.kwargs key = obj.key else: func = obj if not callable(func): raise ValueError( '{} is not a callable'.format(func) ) job = Job( id=str(uuid.uuid4()), timestamp=int(time.time()), topic=self._topic, func=func, args=args, kwargs=kwargs, timeout=self._timeout, key=key ) future = self._producer.send(self._topic, dill.dumps(job), key=key) try: future.get(timeout=self._timeout or 5) except KafkaError as e: self._logger.error('Queuing failed: {}', str(e)) return None self._logger.info('Enqueued: {}'.format(job)) return job
import sys import dill import test_mixins as module try: from imp import reload except ImportError: pass dill.settings['recurse'] = True cached = (module.__cached__ if hasattr(module, "__cached__") else module.__file__.split(".", 1)[0] + ".pyc") module.a = 1234 pik_mod = dill.dumps(module) module.a = 0 # remove module del sys.modules[module.__name__] del module module = dill.loads(pik_mod) assert hasattr(module, "a") and module.a == 1234 assert module.double_add(1, 2, 3) == 2 * module.fx # Restart, and test use_diff reload(module)
class Foo(object): def __init__(self): pass def __getstate__(self): bar[0] = bar[0] + 1 return {} def __setstate__(self, data): pass f = Foo() from dill import dumps, loads dumps(f) dumps(lambda: f, recurse=False) # doesn't call __getstate__ dumps(lambda: f, recurse=True) # calls __getstate__ assert bar[0] == 2 #97 serialize lambdas in test files from math import sin, pi def sinc(x): return sin(x) / x settings['recurse'] = True _sinc = dumps(sinc) del sin
def serialize(obj): """ Should take a complex object and pickle it""" pickled = dill.dumps(obj, byref=False) compressed = lz4.frame.compress(pickled) return compressed
def __init__(self, e_type, e_value, traceback): self.e_type = dill.dumps(e_type) self.e_value = dill.dumps(e_value) self.e_traceback = Traceback(traceback)
return True class _newclass2(object): def __call__(self): pass def ok(self): return True o = _class() oc = _class2() n = _newclass() nc = _newclass2() clslist = [_class,_class2,_newclass,_newclass2] objlist = [o,oc,n,nc] _clslist = [dill.dumps(obj) for obj in clslist] _objlist = [dill.dumps(obj) for obj in objlist] for obj in clslist: globals().pop(obj.__name__) del clslist for obj in ['o','oc','n','nc']: globals().pop(obj) del objlist del obj for obj,cls in zip(_objlist,_clslist): _cls = dill.loads(cls) _obj = dill.loads(obj) assert _obj.ok() assert _cls.ok(_cls())
def create_evaluate_ops( # pylint: disable=too-many-arguments task_prefix: str, data_format: str, input_paths: List[str], prediction_path: str, metric_fn_and_keys: Tuple[T, Iterable[str]], validate_fn: T, batch_prediction_job_id: Optional[str] = None, region: Optional[str] = None, project_id: Optional[str] = None, dataflow_options: Optional[Dict] = None, model_uri: Optional[str] = None, model_name: Optional[str] = None, version_name: Optional[str] = None, dag: Optional[DAG] = None, py_interpreter="python3", ): """ Creates Operators needed for model evaluation and returns. It gets prediction over inputs via Cloud ML Engine BatchPrediction API by calling MLEngineBatchPredictionOperator, then summarize and validate the result via Cloud Dataflow using DataFlowPythonOperator. For details and pricing about Batch prediction, please refer to the website https://cloud.google.com/ml-engine/docs/how-tos/batch-predict and for Cloud Dataflow, https://cloud.google.com/dataflow/docs/ It returns three chained operators for prediction, summary, and validation, named as ``<prefix>-prediction``, ``<prefix>-summary``, and ``<prefix>-validation``, respectively. (``<prefix>`` should contain only alphanumeric characters or hyphen.) The upstream and downstream can be set accordingly like: .. code-block:: python pred, _, val = create_evaluate_ops(...) pred.set_upstream(upstream_op) ... downstream_op.set_upstream(val) Callers will provide two python callables, metric_fn and validate_fn, in order to customize the evaluation behavior as they wish. - metric_fn receives a dictionary per instance derived from json in the batch prediction result. The keys might vary depending on the model. It should return a tuple of metrics. - validation_fn receives a dictionary of the averaged metrics that metric_fn generated over all instances. The key/value of the dictionary matches to what's given by metric_fn_and_keys arg. The dictionary contains an additional metric, 'count' to represent the total number of instances received for evaluation. The function would raise an exception to mark the task as failed, in a case the validation result is not okay to proceed (i.e. to set the trained version as default). Typical examples are like this: .. code-block:: python def get_metric_fn_and_keys(): import math # imports should be outside of the metric_fn below. def error_and_squared_error(inst): label = float(inst["input_label"]) classes = float(inst["classes"]) # 0 or 1 err = abs(classes - label) squared_err = math.pow(classes - label, 2) return (err, squared_err) # returns a tuple. return error_and_squared_error, ["err", "mse"] # key order must match. def validate_err_and_count(summary): if summary["err"] > 0.2: raise ValueError("Too high err>0.2; summary=%s" % summary) if summary["mse"] > 0.05: raise ValueError("Too high mse>0.05; summary=%s" % summary) if summary["count"] < 1000: raise ValueError("Too few instances<1000; summary=%s" % summary) return summary For the details on the other BatchPrediction-related arguments (project_id, job_id, region, data_format, input_paths, prediction_path, model_uri), please refer to MLEngineBatchPredictionOperator too. :param task_prefix: a prefix for the tasks. Only alphanumeric characters and hyphen are allowed (no underscores), since this will be used as dataflow job name, which doesn't allow other characters. :type task_prefix: str :param data_format: either of 'TEXT', 'TF_RECORD', 'TF_RECORD_GZIP' :type data_format: str :param input_paths: a list of input paths to be sent to BatchPrediction. :type input_paths: list[str] :param prediction_path: GCS path to put the prediction results in. :type prediction_path: str :param metric_fn_and_keys: a tuple of metric_fn and metric_keys: - metric_fn is a function that accepts a dictionary (for an instance), and returns a tuple of metric(s) that it calculates. - metric_keys is a list of strings to denote the key of each metric. :type metric_fn_and_keys: tuple of a function and a list[str] :param validate_fn: a function to validate whether the averaged metric(s) is good enough to push the model. :type validate_fn: function :param batch_prediction_job_id: the id to use for the Cloud ML Batch prediction job. Passed directly to the MLEngineBatchPredictionOperator as the job_id argument. :type batch_prediction_job_id: str :param project_id: the Google Cloud project id in which to execute Cloud ML Batch Prediction and Dataflow jobs. If None, then the `dag`'s `default_args['project_id']` will be used. :type project_id: str :param region: the Google Cloud region in which to execute Cloud ML Batch Prediction and Dataflow jobs. If None, then the `dag`'s `default_args['region']` will be used. :type region: str :param dataflow_options: options to run Dataflow jobs. If None, then the `dag`'s `default_args['dataflow_default_options']` will be used. :type dataflow_options: dictionary :param model_uri: GCS path of the model exported by Tensorflow using ``tensorflow.estimator.export_savedmodel()``. It cannot be used with model_name or version_name below. See MLEngineBatchPredictionOperator for more detail. :type model_uri: str :param model_name: Used to indicate a model to use for prediction. Can be used in combination with version_name, but cannot be used together with model_uri. See MLEngineBatchPredictionOperator for more detail. If None, then the `dag`'s `default_args['model_name']` will be used. :type model_name: str :param version_name: Used to indicate a model version to use for prediction, in combination with model_name. Cannot be used together with model_uri. See MLEngineBatchPredictionOperator for more detail. If None, then the `dag`'s `default_args['version_name']` will be used. :type version_name: str :param dag: The `DAG` to use for all Operators. :type dag: airflow.models.DAG :param py_interpreter: Python version of the beam pipeline. If None, this defaults to the python3. To track python versions supported by beam and related issues check: https://issues.apache.org/jira/browse/BEAM-1251 :type py_interpreter: str :returns: a tuple of three operators, (prediction, summary, validation) :rtype: tuple(DataFlowPythonOperator, DataFlowPythonOperator, PythonOperator) """ batch_prediction_job_id = batch_prediction_job_id or "" dataflow_options = dataflow_options or {} region = region or "" # Verify that task_prefix doesn't have any special characters except hyphen # '-', which is the only allowed non-alphanumeric character by Dataflow. if not re.match(r"^[a-zA-Z][-A-Za-z0-9]*$", task_prefix): raise AirflowException( "Malformed task_id for DataFlowPythonOperator (only alphanumeric " "and hyphens are allowed but got: " + task_prefix ) metric_fn, metric_keys = metric_fn_and_keys if not callable(metric_fn): raise AirflowException("`metric_fn` param must be callable.") if not callable(validate_fn): raise AirflowException("`validate_fn` param must be callable.") if dag is not None and dag.default_args is not None: default_args = dag.default_args project_id = project_id or default_args.get('project_id') region = region or default_args['region'] model_name = model_name or default_args.get('model_name') version_name = version_name or default_args.get('version_name') dataflow_options = dataflow_options or default_args.get('dataflow_default_options') evaluate_prediction = MLEngineStartBatchPredictionJobOperator( task_id=(task_prefix + "-prediction"), project_id=project_id, job_id=batch_prediction_job_id, region=region, data_format=data_format, input_paths=input_paths, output_path=prediction_path, uri=model_uri, model_name=model_name, version_name=version_name, dag=dag, ) metric_fn_encoded = base64.b64encode(dill.dumps(metric_fn, recurse=True)).decode() evaluate_summary = DataflowCreatePythonJobOperator( task_id=(task_prefix + "-summary"), py_file=os.path.join(os.path.dirname(__file__), 'mlengine_prediction_summary.py'), dataflow_default_options=dataflow_options, options={ "prediction_path": prediction_path, "metric_fn_encoded": metric_fn_encoded, "metric_keys": ','.join(metric_keys), }, py_interpreter=py_interpreter, py_requirements=['apache-beam[gcp]>=2.14.0'], dag=dag, ) evaluate_summary.set_upstream(evaluate_prediction) def apply_validate_fn(*args, templates_dict, **kwargs): prediction_path = templates_dict["prediction_path"] scheme, bucket, obj, _, _ = urlsplit(prediction_path) if scheme != "gs" or not bucket or not obj: raise ValueError(f"Wrong format prediction_path: {prediction_path}") summary = os.path.join(obj.strip("/"), "prediction.summary.json") gcs_hook = GCSHook() summary = json.loads(gcs_hook.download(bucket, summary)) return validate_fn(summary) evaluate_validation = PythonOperator( task_id=(task_prefix + "-validation"), python_callable=apply_validate_fn, templates_dict={"prediction_path": prediction_path}, dag=dag, ) evaluate_validation.set_upstream(evaluate_summary) return evaluate_prediction, evaluate_summary, evaluate_validation
if HOST: host_galaxy_comp = HostGalaxyComponent() model.components.append(host_galaxy_comp) if BC or BpC: balmer_comp = BalmerCombined(BalmerContinuum=BC, BalmerPseudocContinuum=BpC) model.components.append(balmer_comp) if Calzetti_ext or SMC_ext or MW_ext or AGN_ext or LMC_ext: ext_comp = Extinction(MW=MW_ext, AGN=AGN_ext, LMC=LMC_ext, SMC=SMC_ext, Calzetti=Calzetti_ext) model.components.append(ext_comp) model.data_spectrum = spectrum # add data # ------------ # Run MCMC # ------------ model.run_mcmc(n_walkers=n_walkers, n_iterations=n_iterations) print("Mean acceptance fraction: {0:.3f}".format( np.mean(model.sampler.acceptance_fraction))) # ------------- # save chains & model # ------------ with gzip.open('model.pickle.gz', 'wb') as model_output: model_output.write(pickle.dumps(model))
def msg_serialize(objects: list): return [dill.dumps(obj) for obj in objects]
def pytest_configure_node(node): if node.config._reportportal_enabled is False: # Stop now if the plugin is not properly configured return node.slaveinput['py_test_service'] = pickle.dumps( node.config.py_test_service)
def __init__(self, function): self.function = dill.dumps(function) self.params = None self.eval_result = None
def export_object(obj): import dill as pickle import base64 return base64.b64encode(gzip.zlib.compress(pickle.dumps(obj, 4), 9)).decode('utf-8')