コード例 #1
0
ファイル: remote.py プロジェクト: hackolite/remote
 def wrapper(*args, **kwds):
     pickledfunc = dill.dumps(func)            
     try:
         HOST, USER, PASSWORD = kwds['remote'][0], kwds['remote'][1], kwds['remote'][2] 
     except:
         return func(*args, **kwds)
     PORT = random.randrange(10000, 20000)
     threads = []
     t = Process(target=start_remote, args=(HOST,USER,PASSWORD,PORT))
     t.start()
     time.sleep(2)
     sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
     sock.connect((HOST, PORT))
     modules_list = [itm for itm in  func.func_globals.keys() if itm.count("_")== 0 and itm != func.__name__ and itm != 'remoteFunction']
     h = hashlib.sha1()
     h.update(pickledfunc)
     hexdigestkey = h.hexdigest()
     del kwds['remote']
     lis = [pickledfunc, modules_list, args, kwds, hexdigestkey]
     lis = dill.dumps(lis)
     sock.sendall(lis)
     response = sock.recv(4096)
     response = dill.loads(response)
     return response
     sock.close()
     t.terminate()
コード例 #2
0
ファイル: job.py プロジェクト: alexbw/rq-dill
    def dump(self):
        """Returns a serialization of the current job instance"""
        obj = {}
        obj['created_at'] = utcformat(self.created_at or utcnow())
        obj['data'] = self.data

        if self.origin is not None:
            obj['origin'] = self.origin
        if self.description is not None:
            obj['description'] = self.description
        if self.enqueued_at is not None:
            obj['enqueued_at'] = utcformat(self.enqueued_at)
        if self.ended_at is not None:
            obj['ended_at'] = utcformat(self.ended_at)
        if self._result is not None:
            obj['result'] = dumps(self._result)
        if self.exc_info is not None:
            obj['exc_info'] = self.exc_info
        if self.timeout is not None:
            obj['timeout'] = self.timeout
        if self.result_ttl is not None:
            obj['result_ttl'] = self.result_ttl
        if self._status is not None:
            obj['status'] = self._status
        if self._dependency_id is not None:
            obj['dependency_id'] = self._dependency_id
        if self.meta:
            obj['meta'] = dumps(self.meta)

        return obj
コード例 #3
0
ファイル: computations.py プロジェクト: msreis/SigNetSim
def add_computation(project, entry, object, timeout=None):

	if isinstance(entry, Optimization):
		entry.status = Optimization.QUEUED
		entry.save()
		comp = ComputationQueue(
			project=project,
			type=ComputationQueue.OPTIM,
			computation_id=entry.id,
			object=dumps(object).decode('Latin-1'),
			timeout=timeout
		)
		comp.save()

	elif isinstance(entry, Continuation):
		entry.status = Continuation.QUEUED
		entry.save()
		comp = ComputationQueue(
			project=project,
			type=ComputationQueue.CONT,
			computation_id=entry.id,
			object=dumps(object).decode('Latin-1'),
			timeout=timeout
		)
		comp.save()

	update_queue()
コード例 #4
0
ファイル: pydump.py プロジェクト: gooli/pydump
def _convert(v):
    if dill is not None:
        try:
            dill.dumps(v)
            return v
        except:
            return _safe_repr(v)
    else:
        from datetime import date, time, datetime, timedelta
    
        if PY2:
            BUILTIN = (str, unicode, int, long, float, date, time, datetime, timedelta)
        else:
            BUILTIN = (str, int, float, date, time, datetime, timedelta)
        # XXX: what about bytes and bytearray?
    
        if v is None:
            return v
    
        if type(v) in BUILTIN:
            return v
    
        if type(v) is tuple:
            return tuple(_convert_seq(v))
    
        if type(v) is list:
            return list(_convert_seq(v))
    
        if type(v) is set:
            return set(_convert_seq(v))
    
        if type(v) is dict:
            return _convert_dict(v)
    
        return _safe_repr(v)
コード例 #5
0
ファイル: keras_backend.py プロジェクト: tboquet/python-alp
def serialize(cust_obj):
    """A function to serialize custom objects passed to a model

    Args:
        cust_obj(callable): a custom layer or function to serialize

    Returns:
        a dict of the serialized components of the object"""
    ser_func = dict()
    if isinstance(cust_obj, types.FunctionType):

        func_code = six.get_function_code(cust_obj)
        func_code_d = dill.dumps(func_code).decode('raw_unicode_escape')
        ser_func['func_code_d'] = func_code_d
        ser_func['name_d'] = pickle.dumps(
            cust_obj.__name__).decode('raw_unicode_escape')
        ser_func['args_d'] = pickle.dumps(
            six.get_function_defaults(cust_obj)).decode('raw_unicode_escape')
        clos = dill.dumps(
            six.get_function_closure(cust_obj)).decode('raw_unicode_escape')
        ser_func['clos_d'] = clos
        ser_func['type_obj'] = 'func'
    else:
        if hasattr(cust_obj, '__module__'):  # pragma: no cover
            cust_obj.__module__ = '__main__'
        ser_func['name_d'] = None
        ser_func['args_d'] = None
        ser_func['clos_d'] = None
        ser_func['type_obj'] = 'class'
        loaded = dill.dumps(cust_obj).decode('raw_unicode_escape')
        ser_func['func_code_d'] = loaded
    return ser_func
コード例 #6
0
    def _make_multi_process_batches(self, problem_iter):
        '''
        we do two things here:
        -break tasks into batches to be multiprocessed.
        -multiprocess sometimes does not terminate properly so we observe how many tasks go in and terminate
        once that number of outs is reached.

        Parameters
        ----------
        problem_iter: problems to put into the multiprocess queue

        Returns
        -------
            yields a batchsize sized problem chunks
        '''
        try:
            s = dill.dumps(self, byref=False)
        except Exception as exc:
            print exc
            print "dill dump failed in graphlearn.py (dill dies silently sometimes)"
        self.multiprocess_jobcount = 0
        self.multiprocess_all_prepared = False

        for e in grouper(problem_iter, self.batch_size):
            # cant just take batch size here because output of nons will be suppressed
            problems = [1 for problem in e if problem != None]
            self.multiprocess_jobcount += sum(problems)
            batch = dill.dumps(e)
            yield (s, batch)
        self.multiprocess_all_prepared = True
コード例 #7
0
ファイル: remote.py プロジェクト: hackolite/remote
 def handle(self):
     import sys
     data = self.request.recv(4096)
     cur_thread = threading.current_thread()
     response = "{}: {}".format(cur_thread.name, data)
     rec = dill.loads(data)
     modulesNames = rec[1]
     h = hashlib.sha1()
     h.update(rec[0])
     hexdigestkey = h.hexdigest()
     if hexdigestkey != rec[4]:
     	sys.exit(1)
     remfunc = dill.loads(rec[0])
     for mod_name in modulesNames:
         remfunc.__globals__[mod_name] = import_module(mod_name)
     try :
         res = remfunc(*rec[2], **rec[3])
     except:
         msgerr= sys.exc_info()[0]
         res_pack = dill.dumps(msgerr)
         self.request.sendall(res_pack)             
         self.server.shutdown()            
     res_pack = dill.dumps(res)
     self.request.sendall(res_pack)
     self.server.shutdown()
コード例 #8
0
ファイル: job.py プロジェクト: alexbw/rq-dill
    def func(self, value):
        if inspect.ismethod(value) or inspect.isfunction(value) or inspect.isbuiltin(value):
            self._func = dumps(value)
        else:  # we expect a string
            self._func = dumps(import_attribute(value))

        if inspect.ismethod(value):
            self._instance = value.__self__
コード例 #9
0
ファイル: test_examples.py プロジェクト: danielballan/bluesky
def test_pickling_examples():
    try:
        import dill
    except ImportError:
        raise pytest.skip('requires dill')
    dill.loads(dill.dumps(det))
    dill.loads(dill.dumps(motor))
    dill.loads(dill.dumps(flyer1))
コード例 #10
0
ファイル: pickler.py プロジェクト: ericmand/DataflowPythonSDK
def dumps(o):
  try:
    return base64.b64encode(dill.dumps(o))
  except Exception:          # pylint: disable=broad-except
    dill.dill._trace(True)   # pylint: disable=protected-access
    return base64.b64encode(dill.dumps(o))
  finally:
    dill.dill._trace(False)  # pylint: disable=protected-access
コード例 #11
0
 def check_coder(self, coder, *values):
   self._observe(coder)
   for v in values:
     self.assertEqual(v, coder.decode(coder.encode(v)))
   copy1 = dill.loads(dill.dumps(coder))
   copy2 = dill.loads(dill.dumps(coder))
   for v in values:
     self.assertEqual(v, copy1.decode(copy2.encode(v)))
コード例 #12
0
def test_picklemap():
    encode = picklemap(typed=False, flat=True, serializer='dill')
    assert encode(*args, **kwds) == dumps((1, 2, 'a', 3, 'b', 4))
    encode = picklemap(typed=False, flat=False, serializer='dill')
    assert encode(*args, **kwds) == dumps((args, kwds))
    encode = picklemap(typed=True, flat=True, serializer='dill')
    assert encode(*args, **kwds) == dumps( (1, 2, 'a', 3, 'b', 4, type(1), type(2), type(3), type(4)) )
    encode = picklemap(typed=True, flat=False, serializer='dill')
    assert encode(*args, **kwds) == dumps( (args, kwds, (type(1), type(2)), (type(3), type(4))) )
コード例 #13
0
ファイル: test_nested.py プロジェクト: uqfoundation/dill
def test_basic():
    a = [0, 1, 2]
    pa = pickle.dumps(a)
    pmath = pickle.dumps(math) #XXX: FAILS in pickle
    pmap = pickle.dumps(map)
    # ...
    la = pickle.loads(pa)
    lmath = pickle.loads(pmath)
    lmap = pickle.loads(pmap)
    assert list(map(math.sin, a)) == list(lmap(lmath.sin, la))
コード例 #14
0
ファイル: test_classdef.py プロジェクト: uqfoundation/dill
def test_array_nested():
    try:
        import numpy as np
    
        x = np.array([1])
        y = (x,)
        dill.dumps(x)
        assert y == dill.loads(dill.dumps(y))

    except ImportError: pass
コード例 #15
0
ファイル: utils.py プロジェクト: hoidn/utils
 def new_func(*args, **kwargs):
     # if the "flush" kwarg is passed, recompute regardless of whether
     # the result is cached
     if "flush" in list(kwargs.keys()):
         kwargs.pop("flush", None)
         key = (dill.dumps(args), frozenset(list(kwargs.items())), frozenset(list(closure_dict.items())))
         compute(key)
     key = (dill.dumps(args), frozenset(list(kwargs.items())), frozenset(list(closure_dict.items())))
     if key not in cache:
         compute(key)
     return cache[key]
コード例 #16
0
def test_memoized():
    @memoized(cache=sql_archive())
    def add(x,y):
        return x+y
    add(1,2)
    add(1,2)
    add(1,3)
    #print ("sql_cache = %s" % add.__cache__())
    _key4 = '((), '+str({'y':3, 'x':1})+')'
    _key3 = '((), '+str({'y':2, 'x':1})+')'
    key4_ = '((), '+str({'x':1, 'y':3})+')'
    key3_ = '((), '+str({'x':1, 'y':2})+')'
    assert add.__cache__() == {_key4: 4, _key3: 3} or {key4_: 4, key3_: 3}

    @memoized(cache=dict_archive(cached=False)) # use archive backend 'direcly'
    def add(x,y):
        return x+y
    add(1,2)
    add(1,2)
    add(1,3)
    #print ("dict_cache = %s" % add.__cache__())
    assert add.__cache__() == {_key4: 4, _key3: 3} or {key4_: 4, key3_: 3}

    @memoized(cache=dict())
    def add(x,y):
        return x+y
    add(1,2)
    add(1,2)
    add(1,3)
    #print ("dict_cache = %s" % add.__cache__())
    assert add.__cache__() == {_key4: 4, _key3: 3} or {key4_: 4, key3_: 3}

    @memoized(cache=add.__cache__())
    def add(x,y):
        return x+y
    add(1,2)
    add(2,2)
    #print ("re_dict_cache = %s" % add.__cache__())
    _key2 = '((), '+str({'y':2, 'x':2})+')'
    key2_ = '((), '+str({'x':2, 'y':2})+')'
    assert add.__cache__() == {_key4: 4, _key3: 3, _key2: 4} or {key4_: 4, key3_: 3, key2_: 4}

    @memoized(keymap=dumps)
    def add(x,y):
        return x+y
    add(1,2)
    add(1,2)
    add(1,3)
    #print ("pickle_dict_cache = %s" % add.__cache__())
    _pkey4 = dill.dumps(eval(_key4))
    _pkey3 = dill.dumps(eval(_key3))
    pkey4_ = dill.dumps(eval(key4_))
    pkey3_ = dill.dumps(eval(key3_))
    assert add.__cache__() == {_pkey4: 4, _pkey3: 3} or {pkey4_: 4, pkey3_: 3}
コード例 #17
0
    def setUpClass(cls):
        # super(TestStore, cls).setUpClass()

        cls.env = dict(os.environ)

        cls.store_data = {}
        cls.store_data[StoreProperty.MODE] = Mode.MANY_TASKS
        cls.store_data[StoreProperty.TASK_COUNT] = 10
        cls.store_data[StoreProperty.TASK_NO(0)] = dill.dumps(lambda: 0)
        cls.store_data[StoreProperty.TASK_NO(1)] = dill.dumps(lambda: 1)
        cls.saved_store = cls.STORE_TYPE.save_store(cls.store_data)
        environ.update(cls.saved_store)
コード例 #18
0
ファイル: test_job.py プロジェクト: alexbw/rq-dill
    def test_data_property_sets_job_properties(self):
        """Job tuple gets derived lazily from data property."""
        job = Job()
        def foo(a,b,c,bar=''):
            pass

        job.data = dumps((dumps(foo), None, (1, 2, 3), {'bar': 'qux'}))

        self.assertEquals(job.func_name, 'tests.test_job.foo')
        self.assertEquals(job.instance, None)
        self.assertEquals(job.args, (1, 2, 3))
        self.assertEquals(job.kwargs, {'bar': 'qux'})
コード例 #19
0
ファイル: test_app_base.py プロジェクト: iadgov/WALKOFF
 def test_getattr_gets_from_cache(self):
     workflow_id = uuid4()
     context = {'workflow_execution_id': workflow_id}
     app = AppBase('Something', self.device2.id, context)
     app._cache = self.cache
     app.foo = 42
     app.bar = 23
     self.cache.set(app._format_cache_key('foo'), dill.dumps('a'))
     self.cache.set(app._format_cache_key('bar'), dill.dumps('b'))
     self.assertEqual(app.foo, 'a')
     self.assertEqual(app.bar, 'b')
     with self.assertRaises(AttributeError):
         y = app.baz
コード例 #20
0
    def test_lambdas_pickle(self):
        NONLOCAL_CONST = 5

        lambda_func = lambda x, LOCAL_CONST=7: \
            x * LOCAL_CONST * NONLOCAL_CONST * self.CLASS_CONST * GLOBAL_CONST

        def nested_func(x, LOCAL_CONST=7):
            return x * LOCAL_CONST * NONLOCAL_CONST * self.CLASS_CONST * GLOBAL_CONST

        self.assertEqual(lambda_func(11),
                         pickle.loads(pickle.dumps(lambda_func))(11))
        self.assertEqual(nested_func(11),
                         pickle.loads(pickle.dumps(nested_func))(11))
コード例 #21
0
ファイル: storage.py プロジェクト: AirbornePorcine/will
    def save(self, key, value, expire=0):
        if not hasattr(self, "storage"):
            self.bootstrap_storage()

        try:
            if expire:
                ret = self.storage.setex(key, pickle.dumps(value), expire)
            else:
                ret = self.storage.set(key, pickle.dumps(value))

            return ret
        except:
            logging.critical("Unable to save %s: \n%s" %
                             (key, traceback.format_exc()))
コード例 #22
0
ファイル: util.py プロジェクト: versae/PyFunctional
def is_serializable(func, raise_errors=True):
    if raise_errors is None:
        raise_errors = True
    try:
        serializer.dumps(func, PROTOCOL)
        return True
    except (AttributeError, serializer.PicklingError):
        if raise_errors:
            raise serializer.PicklingError(
                "Function {} is not serializable. "
                "Try installing dill or passing raise_errors to False "
                "for non-parallel execution when serialization fails."
                .format(str(func)))
        return False
コード例 #23
0
ファイル: foundation.py プロジェクト: jedestep/skunkqueue
    def add_job_to_queue(self, job, route, ts=None):
        if ts:
            raise NotImplementedError("fire_at is currently not supported by foundation")
        queue_name = job.queue.name
        job.job_id = str(id(job))

        job_flat = job.json()
        job_flat['now'] = datetime.utcnow()
        if job.queue.queue_type == 'broadcast':
            for worker in self._get_workers(self.skunkdb):
                queue = '__WORKERQUEUE-'+worker['worker_id']
                self._get_queue(queue).push(self.conn, dill.dumps(job_flat))
        else:
            queue = queue_name + '-' + route
            self._get_queue(queue).push(self.conn, dill.dumps(job_flat))
コード例 #24
0
ファイル: trace_pkt.py プロジェクト: amlight/SDNTrace
def generate_trace_pkt(trace_entries, color, r_id):
    """ Receives the REST/PUT to generate a PacketOut
    data needs to be serialized. The goal is always to create
    a packet with data being the TraceMsg to differentiate different
    traces running in parallel. We will stack layers depending of
    the user request. If user submits just a VLAN ID, we will use
    ethertype 88b5 and add TraceMsg after it. Same for IP, however
    the protocol will be 65535. If user provides all the way to TCP/UDP
    we will add TraceMsg after it. First thing is to discover
    what it is that the user has provided.

    Args:
        trace_entries: TraceEntries provided by user or collected from PacketIn
        color: result from Coloring Napp for a specific DPID
        r_id: request ID

    Returns:
        in_port: in_port
        pkt: serialized Ethernet frame
    """

    ethernet = _create_ethernet_frame(trace_entries, color)

    msg = TraceMsg(r_id)

    if ethernet.ether_type == constants.IPV4:
        ip_pkt = _create_ip_packet(trace_entries)
        if ip_pkt.protocol == constants.TCP:
            # No dissector for TCP yet
            ip_pkt.data = dill.dumps(msg)

            # tp_pkt = _create_tcp_packet(trace_entries)
            # ip_pkt.data = tp_pkt.pack()
        elif ip_pkt.protocol == constants.UDP:
            # No dissector for UDP yet
            ip_pkt.data = dill.dumps(msg)

            # tp_pkt = _create_udp_packet(trace_entries)
            # ip_pkt.data = tp_pkt.pack()
        else:
            ip_pkt.data = dill.dumps(msg)

        ethernet.data = ip_pkt.pack()
    else:
        ethernet.data = dill.dumps(msg)

    pkt = ethernet.pack()
    return trace_entries.in_port, pkt
コード例 #25
0
ファイル: test_random.py プロジェクト: rabernat/dask
def test_serializability():
    state = da.random.RandomState(5)
    x = state.normal(10, 1, size=10, chunks=5)

    y = dill.loads(dill.dumps(x))

    assert (x.compute() == y.compute()).all()
コード例 #26
0
 def check_coder(self, coder, *values):
   self._observe(coder)
   for v in values:
     self.assertEqual(v, coder.decode(coder.encode(v)))
     self.assertEqual(coder.estimate_size(v),
                      len(coder.encode(v)))
     self.assertEqual(coder.estimate_size(v),
                      coder.get_impl().estimate_size(v))
     self.assertEqual(coder.get_impl().get_estimated_size_and_observables(v),
                      (coder.get_impl().estimate_size(v), []))
   copy1 = dill.loads(dill.dumps(coder))
   copy2 = dill.loads(dill.dumps(coder))
   for v in values:
     self.assertEqual(v, copy1.decode(copy2.encode(v)))
     if coder.is_deterministic():
       self.assertEqual(copy1.encode(v), copy2.encode(v))
コード例 #27
0
def calc_hash(argument):
  argument_string = pickle.dumps(argument)
  signature = argument_string
  hasher = hashlib.sha256()
  hasher.update(signature)
  hash_string = hasher.hexdigest()
  return hash_string
コード例 #28
0
ファイル: client.py プロジェクト: amatthies/dask
 def send_to_scheduler(self, header, payload):
     log(self.address, 'Send to scheduler', header)
     if 'address' not in header:
         header['address'] = self.address
     header['timestamp'] = datetime.utcnow()
     header['loads'] = dill.loads
     self.socket.send_multipart([pickle.dumps(header), dill.dumps(payload)])
コード例 #29
0
ファイル: pmap.py プロジェクト: RONNCC/bumps
    def _process_work(msg):
        # Check for sentinel
        if msg.reply_to == "": channel.basic_cancel(consumer)

        body = pickle.loads(msg.body)
        mapid = body['mapid']
        if mapid not in _cache:
            _fetch_function(msg.reply_to, mapid)
        function = _cache[mapid]
        if function == None:
            channel.basic_ack(msg.delivery_tag)
            return

        # Acknowledge delivery of message
        #print "processing...",body['index'],body['value']
        try:
            result = function(body['value'])
        except:
            result = None
        #print "done"
        channel.basic_ack(msg.delivery_tag)
        reply = dict(index=body['index'], result=result, mapid=mapid)
        replymsg = amqp.Message(pickle.dumps(reply))
        channel.basic_publish(replymsg, exchange=exchange,
                              routing_key=msg.reply_to)
コード例 #30
0
ファイル: component.py プロジェクト: woozey/hyperspy
 def as_dictionary(self, fullcopy=True):
     """Returns component as a dictionary
     For more information on method and conventions, see
     :meth:`hyperspy.misc.export_dictionary.export_to_dictionary`
     Parameters
     ----------
     fullcopy : Bool (optional, False)
         Copies of objects are stored, not references. If any found,
         functions will be pickled and signals converted to dictionaries
     Returns
     -------
     dic : dictionary
         A dictionary, containing at least the following fields:
         parameters : list
             a list of dictionaries of the parameters, one per
         _whitelist : dictionary
             a dictionary with keys used as references saved attributes, for
             more information, see
             :meth:`hyperspy.misc.export_dictionary.export_to_dictionary`
         * any field from _whitelist.keys() *
     """
     dic = {
         'parameters': [
             p.as_dictionary(fullcopy) for p in self.parameters]}
     export_to_dictionary(self, self._whitelist, dic, fullcopy)
     from hyperspy.model import components
     if self._id_name not in components.__dict__.keys():
         import dill
         dic['_class_dump'] = dill.dumps(self.__class__)
     return dic
コード例 #31
0
 def test_worker_initialize(self) -> None:
     linter = PyLinter(reporter=Reporter())
     worker_initialize(linter=dill.dumps(linter))
     assert isinstance(pylint.lint.parallel._worker_linter, type(linter))
コード例 #32
0
def run_experiment(problem_size, shard_size, pipeline, num_priorities, lru,
                   eager, truncate, max_cores, start_cores, trial,
                   launch_granularity, timeout, log_granularity,
                   autoscale_policy, standalone, warmup, verify, matrix_exists,
                   read_limit, write_limit, compute_threads_per_worker):
    # set up logging
    invoke_executor = fs.ThreadPoolExecutor(1)
    logger = logging.getLogger()
    region = wc.default()["account"]["aws_region"]
    print("REGION", region)
    for key in logging.Logger.manager.loggerDict:
        logging.getLogger(key).setLevel(logging.CRITICAL)
    logger.setLevel(logging.DEBUG)
    arg_bytes = pickle.dumps(
        (problem_size, shard_size, pipeline, num_priorities, lru, eager,
         truncate, max_cores, start_cores, trial, launch_granularity, timeout,
         log_granularity, autoscale_policy, read_limit, write_limit))
    arg_hash = hashlib.md5(arg_bytes).hexdigest()
    log_file = "{0}.log".format(arg_hash)
    fh = logging.FileHandler(log_file)
    formatter = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    fh.setFormatter(formatter)
    ch = logging.StreamHandler()
    ch.setLevel(logging.INFO)
    ch.setFormatter(formatter)
    logger.addHandler(fh)
    logger.addHandler(ch)
    logger.info("Logging to {0}".format(log_file))
    if standalone:
        extra_env = {
            "AWS_ACCESS_KEY_ID": os.environ["AWS_ACCESS_KEY_ID"].strip(),
            "AWS_SECRET_ACCESS_KEY":
            os.environ["AWS_SECRET_ACCESS_KEY"].strip(),
            "OMP_NUM_THREADS": "1",
            "AWS_DEFAULT_REGION": region
        }
        config = wc.default()
        config['runtime']['s3_bucket'] = 'numpywrenpublic'
        key = "pywren.runtime/pywren_runtime-3.6-numpywren.tar.gz"
        config['runtime']['s3_key'] = key
        pwex = pywren.standalone_executor(config=config)
    else:
        extra_env = {"AWS_DEFAULT_REGION": region}
        config = wc.default()
        config['runtime']['s3_bucket'] = 'numpywrenpublic'
        key = "pywren.runtime/pywren_runtime-3.6-numpywren.tar.gz"
        config['runtime']['s3_key'] = key
        print(config)
        pwex = pywren.default_executor(config=config)

    if (not matrix_exists):
        X = np.random.randn(problem_size, 1)
        shard_sizes = [shard_size, 1]
        X_sharded = BigMatrix("qr_test_{0}_{1}".format(problem_size,
                                                       shard_size),
                              shape=X.shape,
                              shard_sizes=shard_sizes,
                              write_header=True,
                              autosqueeze=False,
                              bucket="numpywrennsdi2")
        shard_matrix(X_sharded, X)
        print("Generating PSD matrix...")
        t = time.time()
        print(X_sharded.shape)
        XXT_sharded = binops.gemm(pwex,
                                  X_sharded,
                                  X_sharded.T,
                                  overwrite=False)
        e = time.time()
        print("GEMM took {0}".format(e - t))
    else:
        X_sharded = BigMatrix("qr_test_{0}_{1}".format(problem_size,
                                                       shard_size),
                              autosqueeze=False,
                              bucket="numpywrennsdi2")
        key_name = binops.generate_key_name_binop(X_sharded, X_sharded.T,
                                                  "gemm")
        XXT_sharded = BigMatrix(key_name,
                                hash_keys=False,
                                bucket="numpywrensdi2")
    XXT_sharded.lambdav = problem_size * 10
    t = time.time()
    program, meta = qr(XXT_sharded)
    pipeline_width = args.pipeline
    if (lru):
        cache_size = 5
    else:
        cache_size = 0
    pywren_config = pwex.config
    e = time.time()
    print("Program compile took {0} seconds".format(e - t))
    print("program.hash", program.hash)
    REDIS_CLIENT = program.control_plane.client
    done_counts = []
    ready_counts = []
    post_op_counts = []
    not_ready_counts = []
    running_counts = []
    sqs_invis_counts = []
    sqs_vis_counts = []
    up_workers_counts = []
    busy_workers_counts = []
    read_objects = []
    write_objects = []
    all_read_timeouts = []
    all_write_timeouts = []
    all_redis_timeouts = []
    times = [time.time()]
    flops = [0]
    reads = [0]
    writes = [0]
    print("LRU", lru)
    print("eager", eager)
    exp = {}
    exp["redis_done_counts"] = done_counts
    exp["redis_ready_counts"] = ready_counts
    exp["redis_post_op_counts"] = post_op_counts
    exp["redis_not_ready_counts"] = not_ready_counts
    exp["redis_running_counts"] = running_counts
    exp["sqs_invis_counts"] = sqs_invis_counts
    exp["sqs_vis_counts"] = sqs_vis_counts
    exp["busy_workers"] = busy_workers_counts
    exp["up_workers"] = up_workers_counts
    exp["times"] = times
    exp["lru"] = lru
    exp["priority"] = num_priorities
    exp["eager"] = eager
    exp["truncate"] = truncate
    exp["max_cores"] = max_cores
    exp["problem_size"] = problem_size
    exp["shard_size"] = shard_size
    exp["pipeline"] = pipeline
    exp["flops"] = flops
    exp["reads"] = reads
    exp["writes"] = writes
    exp["read_objects"] = read_objects
    exp["write_objects"] = write_objects
    exp["read_timeouts"] = all_read_timeouts
    exp["write_timeouts"] = all_write_timeouts
    exp["redis_timeouts"] = all_redis_timeouts
    exp["trial"] = trial
    exp["launch_granularity"] = launch_granularity
    exp["log_granularity"] = log_granularity
    exp["autoscale_policy"] = autoscale_policy
    exp["standalone"] = standalone
    exp["program"] = program
    exp["time_steps"] = 1
    exp["failed"] = False

    program.start()
    t = time.time()
    logger.info("Starting with {0} cores".format(start_cores))
    all_futures = pwex.map(
        lambda x: job_runner.lambdapack_run(program,
                                            pipeline_width=pipeline_width,
                                            cache_size=cache_size,
                                            timeout=timeout),
        range(start_cores),
        extra_env=extra_env)
    start_time = time.time()
    last_run_time = start_time
    print(program.program_status())
    print("QUEUE URLS", len(program.queue_urls))
    total_lambda_epochs = start_cores
    try:
        while (program.program_status() == lp.PS.RUNNING):
            time.sleep(log_granularity)
            curr_time = int(time.time() - start_time)
            p = program.get_progress()
            if (p is None):
                print("no progress...")
                continue
            else:
                p = int(p)
            times.append(int(time.time()))
            max_pc = p
            waiting = 0
            running = 0
            for i, queue_url in enumerate(program.queue_urls):
                client = boto3.client('sqs')
                attrs = client.get_queue_attributes(
                    QueueUrl=queue_url,
                    AttributeNames=[
                        'ApproximateNumberOfMessages',
                        'ApproximateNumberOfMessagesNotVisible'
                    ])['Attributes']
                waiting += int(attrs["ApproximateNumberOfMessages"])
                running += int(attrs["ApproximateNumberOfMessagesNotVisible"])
            sqs_invis_counts.append(running)
            sqs_vis_counts.append(waiting)
            busy_workers = REDIS_CLIENT.get("{0}_busy".format(program.hash))
            if (busy_workers == None):
                busy_workers = 0
            else:
                busy_workers = int(busy_workers)
            up_workers = program.get_up()

            if (up_workers == None):
                up_workers = 0
            else:
                up_workers = int(up_workers)
            up_workers_counts.append(up_workers)
            busy_workers_counts.append(busy_workers)

            logger.debug("{2}: Up Workers: {0}, Busy Workers: {1}".format(
                up_workers, busy_workers, curr_time))
            if ((curr_time % INFO_FREQ) == 0):
                logger.info("Waiting: {0}, Currently Processing: {1}".format(
                    waiting, running))
                logger.info("{2}: Up Workers: {0}, Busy Workers: {1}".format(
                    up_workers, busy_workers, curr_time))

            current_gflops = program.get_flops()
            if (current_gflops is None):
                current_gflops = 0
            else:
                current_gflops = int(current_gflops) / 1e9

            flops.append(current_gflops)
            current_gbytes_read = program.get_read()
            if (current_gbytes_read is None):
                current_gbytes_read = 0
            else:
                current_gbytes_read = int(current_gbytes_read) / 1e9

            reads.append(current_gbytes_read)
            current_gbytes_write = program.get_write()
            if (current_gbytes_write is None):
                current_gbytes_write = 0
            else:
                current_gbytes_write = int(current_gbytes_write) / 1e9
            writes.append(current_gbytes_write)

            gflops_rate = flops[-1] / (times[-1] - times[0])
            greads_rate = reads[-1] / (times[-1] - times[0])
            gwrites_rate = writes[-1] / (times[-1] - times[0])
            b = XXT_sharded.shard_sizes[0]
            current_objects_read = (current_gbytes_read * 1e9) / (b * b * 8)
            current_objects_write = (current_gbytes_write * 1e9) / (b * b * 8)
            read_objects.append(current_objects_read)
            write_objects.append(current_objects_write)
            read_rate = read_objects[-1] / (times[-1] - times[0])
            write_rate = write_objects[-1] / (times[-1] - times[0])

            avg_workers = np.mean(up_workers_counts)
            smooth_len = 10
            if (len(flops) > smooth_len + 5):
                gflops_rate_5_min_window = (flops[-1] - flops[-smooth_len]) / (
                    times[-1] - times[-smooth_len])
                gread_rate_5_min_window = (reads[-1] - reads[-smooth_len]) / (
                    times[-1] - times[-smooth_len])
                gwrite_rate_5_min_window = (
                    writes[-1] - writes[-smooth_len]) / (times[-1] -
                                                         times[-smooth_len])
                read_rate_5_min_window = (read_objects[-1] -
                                          read_objects[-smooth_len]) / (
                                              times[-1] - times[-smooth_len])
                write_rate_5_min_window = (write_objects[-1] -
                                           write_objects[-smooth_len]) / (
                                               times[-1] - times[-smooth_len])
                workers_5_min_window = np.mean(up_workers_counts[-smooth_len:])
            else:
                gflops_rate_5_min_window = "N/A"
                gread_rate_5_min_window = "N/A"
                gwrite_rate_5_min_window = "N/A"
                workers_5_min_window = "N/A"
                read_rate_5_min_window = "N/A"
                write_rate_5_min_window = "N/A"

            read_timeouts = int(parse_int(
                REDIS_CLIENT.get("s3.timeouts.read")))
            write_timeouts = int(
                parse_int(REDIS_CLIENT.get("s3.timeouts.write")))
            redis_timeouts = int(parse_int(REDIS_CLIENT.get("redis.timeouts")))
            all_read_timeouts.append(read_timeouts)
            all_write_timeouts.append(write_timeouts)
            all_redis_timeouts.append(redis_timeouts)
            read_timeouts_fraction = read_timeouts / (current_objects_read +
                                                      1e-8)
            write_timeouts_fraction = write_timeouts / (current_objects_write +
                                                        1e-8)
            print("=======================================")
            print("Max PC is {0}".format(max_pc))
            print("Waiting: {0}, Currently Processing: {1}".format(
                waiting, running))
            print("{2}: Up Workers: {0}, Busy Workers: {1}".format(
                up_workers, busy_workers, curr_time))
            print(
                "{0}: Total GFLOPS {1}, Total GBytes Read {2}, Total GBytes Write {3}"
                .format(curr_time, current_gflops, current_gbytes_read,
                        current_gbytes_write))
            print(
                "{0}: Average GFLOPS rate {1}, Average GBytes Read rate {2}, Average GBytes Write  rate {3}, Average Worker Count {4}"
                .format(curr_time, gflops_rate, greads_rate, gwrites_rate,
                        avg_workers))
            print("{0}: Average read txns/s {1}, Average write txns/s {2}".
                  format(curr_time, read_rate, write_rate))
            print(
                "{0}: smoothed GFLOPS rate {1}, smoothed GBytes Read rate {2}, smoothed GBytes Write  rate {3}, smoothed Worker Count {4}"
                .format(curr_time, gflops_rate_5_min_window,
                        gread_rate_5_min_window, gwrite_rate_5_min_window,
                        workers_5_min_window))
            print("{0}: smoothed read txns/s {1}, smoothed write txns/s {2}".
                  format(curr_time, read_rate_5_min_window,
                         write_rate_5_min_window))
            print(
                "{0}: Read timeouts: {1}, Write timeouts: {2}, Redis timeouts: {3}  "
                .format(curr_time, read_timeouts, write_timeouts,
                        redis_timeouts))
            print(
                "{0}: Read timeouts fraction: {1}, Write timeouts fraction: {2}"
                .format(curr_time, read_timeouts_fraction,
                        write_timeouts_fraction))
            print("=======================================")

            time_since_launch = time.time() - last_run_time
            if (time_since_launch > (0.85 * timeout)):
                break
            exp["time_steps"] += 1
    except KeyboardInterrupt:
        exp["failed"] = True
        program.stop()
        pass
    except Exception as e:
        traceback.print_exc()
        exp["failed"] = True
        program.stop()
        raise
        pass
    print("killing program...")
    print(program.program_status())
    exp["all_futures"] = all_futures
    exp_bytes = dill.dumps(exp)
    client = boto3.client('s3')
    client.put_object(Key="lambdapack/{0}/runtime.pickle".format(program.hash),
                      Body=exp_bytes,
                      Bucket=program.bucket)
    print("=======================")
    print("=======================")
    print("Execution Summary:")
    print("Executed Program ID: {0}".format(program.hash))
    print("Program Success: {0}".format((not exp["failed"])))
    print("Problem Size: {0}".format(exp["problem_size"]))
    print("Shard Size: {0}".format(exp["shard_size"]))
    print("Total Execution time: {0}".format(times[-1] - times[0]))
    print("Average Flop Rate (GFlop/s): {0}".format(exp["flops"][-1] /
                                                    (times[-1] - times[0])))
    with open("/tmp/last_run", "w+") as f:
        f.write(program.hash)
コード例 #33
0
def handle_client(client_socket):
    while True:
        try:
            print("here once again")
            message_length=int((client_socket.recv(HEADER_LENGTH).strip()).decode("utf-8"))
            objectx=client_socket.recv(message_length)
            while True:
                modify_list.acquire()
                if(len(online_servers)>1):
                    server_socket1=online_servers.pop(0)
                    server_socket2=online_servers.pop(0)
                    modify_list.release()
                    break
                else:
                    pass
                modify_list.release()
                time.sleep(0.5)
            
            object_decode=pickle.loads(objectx)
            object_decode1=copy.deepcopy(object_decode)
            object_decode2=copy.deepcopy(object_decode)
            print(object_decode)
            indexx=len(object_decode.data)
            
            object_decode1.data=object_decode.data[0:int(indexx/2)]
            object_decode2.data=object_decode.data[int(indexx/2):indexx]
            
            print(object_decode1.data,object_decode1)
            print(object_decode2.data,object_decode2)
            
            objectx1=pickle.dumps(object_decode1)
            objectx2=pickle.dumps(object_decode2)
            
            objectx_length=f"{len(objectx):<{HEADER_LENGTH}}".encode("utf-8")
            
            objectx1_length=f"{len(objectx1):<{HEADER_LENGTH}}".encode("utf-8")
            objectx2_length=f"{len(objectx2):<{HEADER_LENGTH}}".encode("utf-8")
            
            server_socket1.send(objectx1_length+objectx1)
            server_socket2.send(objectx2_length+objectx2)
            print("Sent")
            
            
            objectx1_length=int(server_socket1.recv(HEADER_LENGTH).strip().decode("utf-8"))
            objectx1=server_socket1.recv(objectx1_length)
            objectx1=pickle.loads(objectx1)
            print("Here")
            print(objectx1)
            
            objectx2_length=int(server_socket2.recv(HEADER_LENGTH).strip().decode("utf-8"))
            print("Here1")
            print(objectx2_length)
            objectx2=server_socket2.recv(objectx2_length)
            print("Here2")
            objectx2=pickle.loads(objectx2)
            print("Received")
            
            #Combine Function 
            object_decode.data=[]
            object_decode.data.append(objectx1.processed_data)
            object_decode.data.append(objectx2.processed_data)
            
            object_decode.processed_data=object_decode.function(object_decode.data)
            objectx=pickle.dumps(object_decode)
            objectx_length=f"{len(objectx):<{HEADER_LENGTH}}".encode("utf-8")
            client_socket.send(objectx_length+objectx)
            print("Sent to Client")
            
            modify_list.acquire()
            online_servers.append(server_socket1)
            online_servers.append(server_socket2)
            modify_list.release()
            
            print("Reavailable Server")
        except IOError as e:
            if e.errno!=errno.EAGAIN and e.errno!=errno.EWOULDBLOCK:
                print('READING ERROR , Client Must have Ended the Connection',str(e))
                server_socket.close()
                client_socket.close()
                break
        except Exception as e:
            client_socket.close()
            server_socket.close()
            print("Error I am here",e)
            break
コード例 #34
0
ファイル: effect.py プロジェクト: thomhickey/pfun
def run_dill_encoded(payload: bytes) -> bytes:
    fun, args, kwargs = dill.loads(payload)
    return dill.dumps(fun(*args, **kwargs))
コード例 #35
0
 def save(self, key, value, expire=None):
     self.bootstrap_storage()
     try:
         return self.storage.save(key, pickle.dumps(value), expire=expire)
     except Exception:
         logging.exception("Unable to save %s", key)
コード例 #36
0
ファイル: test_excel.py プロジェクト: zxf033/formulas
    def test_excel_model(self):
        start = time.time()
        _msg = '[info] test_excel_model: '
        xl_mdl = ExcelModel()

        print('\n%sLoading excel-model.' % _msg)
        s = time.time()

        xl_mdl.loads(self.filename)
        xl_mdl.add_book(self.link_filename)

        msg = '%sLoaded excel-model in %.2fs.\n%sFinishing excel-model.'
        print(msg % (_msg, time.time() - s, _msg))
        s = time.time()

        xl_mdl.finish()

        print('%sFinished excel-model in %.2fs.' % (_msg, time.time() - s))

        n_test = 0
        for i in range(3):
            print('%sCalculate excel-model.' % _msg)
            s = time.time()

            xl_mdl.calculate({"'[EXTRA.XLSX]EXTRA'!A1:B1": [[1, 1]]})

            msg = '%sCalculated excel-model in %.2fs.\n%s' \
                  'Comparing overwritten results.'
            print(msg % (_msg, time.time() - s, _msg))
            s = time.time()

            books = _res2books(xl_mdl.write(xl_mdl.books))
            n_test += self._compare(books, self.results)

            msg = '%sCompared overwritten results in %.2fs.\n' \
                  '%sComparing fresh written results.'
            print(msg % (_msg, time.time() - s, _msg))
            s = time.time()

            n_test += self._compare(_res2books(xl_mdl.write()), self.results)

            msg = '%sCompared fresh written results in %.2fs.'
            print(msg % (_msg, time.time() - s))

            if i == 0:
                print('%sSaving excel-model dill.' % _msg)
                s = time.time()

                xl_copy = dill.dumps(xl_mdl)

                msg = '%sSaved excel-model dill in %.2fs.\n' \
                      '%sLoading excel-model dill.'
                print(msg % (_msg, time.time() - s, _msg))
                s = time.time()

                xl_mdl = dill.loads(xl_copy)
                del xl_copy

                msg = '%sLoaded excel-model dill in %.2fs.'
                print(msg % (_msg, time.time() - s))

            elif i == 1:
                print('%sDeep-copying excel-model.' % _msg)
                s = time.time()

                xl_mdl = copy.deepcopy(xl_mdl)

                msg = '%sDeep-copied excel-model in %.2fs.'
                print(msg % (_msg, time.time() - s))

        print('%sSaving excel-model xlsx.' % _msg)
        s = time.time()

        dirpath = osp.join(mydir, 'tmp')
        xl_mdl.write(dirpath=dirpath)

        msg = '%sSaved excel-model exls in %.2fs.\n%sComparing saved results.'
        print(msg % (_msg, time.time() - s, _msg))
        s = time.time()

        n_test += self._compare(
            _file2books(*(osp.join(dirpath, fp) for fp in xl_mdl.books)),
            self.results)

        msg = '%sCompared saved results in %.2fs.\n%sRan %d tests in %.2fs'
        print(msg % (_msg, time.time() - s, _msg, n_test, time.time() - start))
コード例 #37
0
 def test_load_dump(self):
     with testing.tmp_dir(self.get_temp_dir()) as tmp_dir:
         builder = testing.DummyMnist(data_dir=tmp_dir)
     builder2 = dill.loads(dill.dumps(builder))
     self.assertEqual(builder.name, builder2.name)
     self.assertEqual(builder.version, builder2.version)
コード例 #38
0
def write_dill_compressed(file_name, data):
    with gzip.GzipFile(file_name, 'w') as fout:
        fout.write(dill.dumps(data))
コード例 #39
0
import pickle
import dill  # pip install dill

square = lambda x: x * x
# my_pickle = pickle.dumps(square)
my_pickle = dill.dumps(square)
print(my_pickle)
コード例 #40
0
def test_r_error_pickling():
    import dill

    # loads failed because the old constructor or RError had no "default constructor"
    err = dill.loads(dill.dumps(RError("test")))
    assert err.value == "test"
コード例 #41
0
ファイル: progbar.py プロジェクト: trungnt13/odin-ai
 def __getstate__(self):
   import dill
   return dill.dumps(self.func)
コード例 #42
0
ファイル: utils.py プロジェクト: sims-s/giant
def get_message_string(message):
    """ Return a string that can be sent over the network. """
    return pickle.dumps(message)
コード例 #43
0
def _make_default_run_script(
    url: str,
    learners_file: str,
    save_interval: int,
    log_interval: int,
    goal: Optional[Callable[[adaptive.BaseLearner], bool]] = None,
    runner_kwargs: Optional[Dict[str, Any]] = None,
    run_script_fname: str = "run_learner.py",
    executor_type: str = "mpi4py",
) -> None:
    default_runner_kwargs = dict(shutdown_executor=True)
    runner_kwargs = dict(default_runner_kwargs,
                         goal=goal,
                         **(runner_kwargs or {}))
    serialized_runner_kwargs = dill.dumps(runner_kwargs)

    if executor_type == "mpi4py":
        import_line = "from mpi4py.futures import MPIPoolExecutor"
        executor_line = "MPIPoolExecutor()"
    elif executor_type == "ipyparallel":
        import_line = "from adaptive_scheduler.utils import connect_to_ipyparallel"
        executor_line = "connect_to_ipyparallel(profile=args.profile, n=args.n)"
    elif executor_type == "dask-mpi":
        try:
            import dask_mpi  # noqa: F401
        except ModuleNotFoundError as e:
            msg = "You need to have 'dask-mpi' installed to use `executor_type='dask-mpi'`."
            raise Exception(msg) from e
        import_line = "from distributed import Client"
        executor_line = "Client()"
    else:
        raise NotImplementedError("Use 'ipyparallel', 'dask-mpi' or 'mpi4py'.")

    if os.path.abspath(os.path.dirname(learners_file)) != os.path.abspath(""):
        raise RuntimeError(f"The {learners_file} needs to be in the same"
                           " directory as where this is run from.")

    learners_module = os.path.splitext(os.path.basename(learners_file))[0]

    template = textwrap.dedent(f"""\
    #!/usr/bin/env python3
    # {run_script_fname}, automatically generated
    # by `adaptive_scheduler.server_support._make_default_run_script()`.
    import argparse
    from contextlib import suppress

    import adaptive
    import dill
    from adaptive_scheduler import client_support
    {import_line}


    # the file that defines the learners we created above
    from {learners_module} import learners, fnames

    if __name__ == "__main__":  # ← use this, see warning @ https://bit.ly/2HAk0GG

        # parse arguments
        parser = argparse.ArgumentParser()
        parser.add_argument("--profile", action="store", dest="profile", type=str)
        parser.add_argument("--n", action="store", dest="n", type=int)
        parser.add_argument("--log-fname", action="store", dest="log_fname", type=str)
        parser.add_argument("--job-id", action="store", dest="job_id", type=str)
        parser.add_argument("--name", action="store", dest="name", type=str)
        args = parser.parse_args()

        # the address of the "database manager"
        url = "{url}"

        # ask the database for a learner that we can run which we log in `args.log_fname`
        learner, fname = client_support.get_learner(
            learners, fnames, url, args.log_fname, args.job_id, args.name
        )

        # load the data
        with suppress(Exception):
            learner.load(fname)

        # connect to the executor
        executor = {executor_line}

        # this is serialized by dill.dumps
        runner_kwargs = dill.loads({serialized_runner_kwargs})

        # run until `some_goal` is reached with an `MPIPoolExecutor`
        runner = adaptive.Runner(learner, executor=executor, **runner_kwargs)

        # periodically save the data (in case the job dies)
        runner.start_periodic_saving(dict(fname=fname), interval={save_interval})

        # log progress info in the job output script, optional
        client_support.log_info(runner, interval={log_interval})

        # block until runner goal reached
        runner.ioloop.run_until_complete(runner.task)

        # save once more after the runner is done
        learner.save(fname)

        # tell the database that this learner has reached its goal
        client_support.tell_done(url, fname)
    """)
    if executor_type == "dask-mpi":
        template = "from dask_mpi import initialize; initialize()\n" + template

    with open(run_script_fname, "w") as f:
        f.write(template)
コード例 #44
0
ファイル: test_pool.py プロジェクト: yueweizhizhu/machin
 def test_reduce(self):
     with pytest.raises(RuntimeError, match="not reducible"):
         dill.dumps(ThreadPool(processes=2))
コード例 #45
0
 def push(self, msg, key=''):
     key = self._key if not key else key
     return self._queue.rpush(key, pickle.dumps(msg))
コード例 #46
0
 def set_object(self, x):
     self.object = pickle.dumps(x)
コード例 #47
0
 def __setitem__(self, key, value):
     self.cache[key] = value
     filepath = os.path.join(self.paramdir, get_box_name(self.name), key)
     with open(filepath, "wb") as fp:
         serialized = dill.dumps(value)
         fp.write(serialized)
コード例 #48
0
 def test_serializing_and_deserializing_unconfigured_robot(self):
     robot_as_bytes = dill.dumps(self.robot)
     self.assertIsInstance(robot_as_bytes, bytes)
     dill.loads(robot_as_bytes)
コード例 #49
0
ファイル: queue.py プロジェクト: shashankmehra/kq
    def enqueue(self, obj, *args, **kwargs):
        """Serialize the function call and place it in the Kafka topic.

        For example:

        .. code-block:: python

            import requests
            from kq import Queue

            q = Queue()

            # You can queue the function call with its arguments
            job = q.enqueue(requests.get, 'https://www.google.com')

            # Or you can queue a kq.job.Job instance directly
            q.enqueue(job)

        :param obj: Function or the job object to enqueue. If a function is
            given, the function *must* be pickle-able.
        :type obj: callable | kq.job.Job
        :param args: Arguments for the function. Ignored if a KQ job object
            is given for the first argument instead.
        :type args: list
        :param kwargs: Keyword arguments for the function. Ignored if a KQ
            job instance is given as the first argument instead.
        :type kwargs: dict
        :param key: Queue the job with a key. Jobs queued with a specific key
            are processed in order they were queued. Setting it to None (default)
            disables this behaviour.
        :type key: str | unicode
        :return: The job that was enqueued
        :rtype: kq.job.Job
        """
        key = None
        if isinstance(obj, Job):
            func = obj.func
            args = obj.args
            kwargs = obj.kwargs
            key = obj.key
        else:
            func = obj

        if not callable(func):
            raise ValueError(
                '{} is not a callable'.format(func)
            )
        job = Job(
            id=str(uuid.uuid4()),
            timestamp=int(time.time()),
            topic=self._topic,
            func=func,
            args=args,
            kwargs=kwargs,
            timeout=self._timeout,
            key=key
        )

        future = self._producer.send(self._topic, dill.dumps(job), key=key)
        try:
            future.get(timeout=self._timeout or 5)
        except KafkaError as e:
            self._logger.error('Queuing failed: {}', str(e))
            return None
        self._logger.info('Enqueued: {}'.format(job))
        return job
コード例 #50
0
import sys
import dill
import test_mixins as module
try:
    from imp import reload
except ImportError:
    pass
dill.settings['recurse'] = True

cached = (module.__cached__ if hasattr(module, "__cached__") else
          module.__file__.split(".", 1)[0] + ".pyc")

module.a = 1234

pik_mod = dill.dumps(module)

module.a = 0

# remove module
del sys.modules[module.__name__]
del module

module = dill.loads(pik_mod)
assert hasattr(module, "a") and module.a == 1234
assert module.double_add(1, 2, 3) == 2 * module.fx

# Restart, and test use_diff

reload(module)
コード例 #51
0
ファイル: test_detect.py プロジェクト: brstrat/dill
class Foo(object):
    def __init__(self):
        pass

    def __getstate__(self):
        bar[0] = bar[0] + 1
        return {}

    def __setstate__(self, data):
        pass


f = Foo()
from dill import dumps, loads
dumps(f)
dumps(lambda: f, recurse=False)  # doesn't call __getstate__
dumps(lambda: f, recurse=True)  # calls __getstate__
assert bar[0] == 2

#97 serialize lambdas in test files
from math import sin, pi


def sinc(x):
    return sin(x) / x


settings['recurse'] = True
_sinc = dumps(sinc)
del sin
コード例 #52
0
ファイル: cereal.py プロジェクト: kivo360/jamboree
def serialize(obj):
    """ Should take a complex object and pickle it"""
    pickled = dill.dumps(obj, byref=False)
    compressed = lz4.frame.compress(pickled)
    return compressed
コード例 #53
0
ファイル: errors.py プロジェクト: xinyixiang/parsl
    def __init__(self, e_type, e_value, traceback):

        self.e_type = dill.dumps(e_type)
        self.e_value = dill.dumps(e_value)
        self.e_traceback = Traceback(traceback)
コード例 #54
0
ファイル: test_classdef.py プロジェクト: genba/dill
        return True

class _newclass2(object):
    def __call__(self):
        pass
    def ok(self):
        return True

o = _class()
oc = _class2()
n = _newclass()
nc = _newclass2()

clslist = [_class,_class2,_newclass,_newclass2]
objlist = [o,oc,n,nc]
_clslist = [dill.dumps(obj) for obj in clslist]
_objlist = [dill.dumps(obj) for obj in objlist]

for obj in clslist:
    globals().pop(obj.__name__)
del clslist
for obj in ['o','oc','n','nc']:
    globals().pop(obj)
del objlist
del obj

for obj,cls in zip(_objlist,_clslist):
    _cls = dill.loads(cls)
    _obj = dill.loads(obj)
    assert _obj.ok()
    assert _cls.ok(_cls())
コード例 #55
0
def create_evaluate_ops(  # pylint: disable=too-many-arguments
    task_prefix: str,
    data_format: str,
    input_paths: List[str],
    prediction_path: str,
    metric_fn_and_keys: Tuple[T, Iterable[str]],
    validate_fn: T,
    batch_prediction_job_id: Optional[str] = None,
    region: Optional[str] = None,
    project_id: Optional[str] = None,
    dataflow_options: Optional[Dict] = None,
    model_uri: Optional[str] = None,
    model_name: Optional[str] = None,
    version_name: Optional[str] = None,
    dag: Optional[DAG] = None,
    py_interpreter="python3",
):
    """
    Creates Operators needed for model evaluation and returns.

    It gets prediction over inputs via Cloud ML Engine BatchPrediction API by
    calling MLEngineBatchPredictionOperator, then summarize and validate
    the result via Cloud Dataflow using DataFlowPythonOperator.

    For details and pricing about Batch prediction, please refer to the website
    https://cloud.google.com/ml-engine/docs/how-tos/batch-predict
    and for Cloud Dataflow, https://cloud.google.com/dataflow/docs/

    It returns three chained operators for prediction, summary, and validation,
    named as ``<prefix>-prediction``, ``<prefix>-summary``, and ``<prefix>-validation``,
    respectively.
    (``<prefix>`` should contain only alphanumeric characters or hyphen.)

    The upstream and downstream can be set accordingly like:

    .. code-block:: python

        pred, _, val = create_evaluate_ops(...)
        pred.set_upstream(upstream_op)
        ...
        downstream_op.set_upstream(val)

    Callers will provide two python callables, metric_fn and validate_fn, in
    order to customize the evaluation behavior as they wish.

    - metric_fn receives a dictionary per instance derived from json in the
      batch prediction result. The keys might vary depending on the model.
      It should return a tuple of metrics.
    - validation_fn receives a dictionary of the averaged metrics that metric_fn
      generated over all instances.
      The key/value of the dictionary matches to what's given by
      metric_fn_and_keys arg.
      The dictionary contains an additional metric, 'count' to represent the
      total number of instances received for evaluation.
      The function would raise an exception to mark the task as failed, in a
      case the validation result is not okay to proceed (i.e. to set the trained
      version as default).

    Typical examples are like this:

    .. code-block:: python

        def get_metric_fn_and_keys():
            import math  # imports should be outside of the metric_fn below.

            def error_and_squared_error(inst):
                label = float(inst["input_label"])
                classes = float(inst["classes"])  # 0 or 1
                err = abs(classes - label)
                squared_err = math.pow(classes - label, 2)
                return (err, squared_err)  # returns a tuple.

            return error_and_squared_error, ["err", "mse"]  # key order must match.


        def validate_err_and_count(summary):
            if summary["err"] > 0.2:
                raise ValueError("Too high err>0.2; summary=%s" % summary)
            if summary["mse"] > 0.05:
                raise ValueError("Too high mse>0.05; summary=%s" % summary)
            if summary["count"] < 1000:
                raise ValueError("Too few instances<1000; summary=%s" % summary)
            return summary

    For the details on the other BatchPrediction-related arguments (project_id,
    job_id, region, data_format, input_paths, prediction_path, model_uri),
    please refer to MLEngineBatchPredictionOperator too.

    :param task_prefix: a prefix for the tasks. Only alphanumeric characters and
        hyphen are allowed (no underscores), since this will be used as dataflow
        job name, which doesn't allow other characters.
    :type task_prefix: str

    :param data_format: either of 'TEXT', 'TF_RECORD', 'TF_RECORD_GZIP'
    :type data_format: str

    :param input_paths: a list of input paths to be sent to BatchPrediction.
    :type input_paths: list[str]

    :param prediction_path: GCS path to put the prediction results in.
    :type prediction_path: str

    :param metric_fn_and_keys: a tuple of metric_fn and metric_keys:

        - metric_fn is a function that accepts a dictionary (for an instance),
          and returns a tuple of metric(s) that it calculates.

        - metric_keys is a list of strings to denote the key of each metric.
    :type metric_fn_and_keys: tuple of a function and a list[str]

    :param validate_fn: a function to validate whether the averaged metric(s) is
        good enough to push the model.
    :type validate_fn: function

    :param batch_prediction_job_id: the id to use for the Cloud ML Batch
        prediction job. Passed directly to the MLEngineBatchPredictionOperator as
        the job_id argument.
    :type batch_prediction_job_id: str

    :param project_id: the Google Cloud project id in which to execute
        Cloud ML Batch Prediction and Dataflow jobs. If None, then the `dag`'s
        `default_args['project_id']` will be used.
    :type project_id: str

    :param region: the Google Cloud region in which to execute Cloud ML
        Batch Prediction and Dataflow jobs. If None, then the `dag`'s
        `default_args['region']` will be used.
    :type region: str

    :param dataflow_options: options to run Dataflow jobs. If None, then the
        `dag`'s `default_args['dataflow_default_options']` will be used.
    :type dataflow_options: dictionary

    :param model_uri: GCS path of the model exported by Tensorflow using
        ``tensorflow.estimator.export_savedmodel()``. It cannot be used with
        model_name or version_name below. See MLEngineBatchPredictionOperator for
        more detail.
    :type model_uri: str

    :param model_name: Used to indicate a model to use for prediction. Can be
        used in combination with version_name, but cannot be used together with
        model_uri. See MLEngineBatchPredictionOperator for more detail. If None,
        then the `dag`'s `default_args['model_name']` will be used.
    :type model_name: str

    :param version_name: Used to indicate a model version to use for prediction,
        in combination with model_name. Cannot be used together with model_uri.
        See MLEngineBatchPredictionOperator for more detail. If None, then the
        `dag`'s `default_args['version_name']` will be used.
    :type version_name: str

    :param dag: The `DAG` to use for all Operators.
    :type dag: airflow.models.DAG

    :param py_interpreter: Python version of the beam pipeline.
        If None, this defaults to the python3.
        To track python versions supported by beam and related
        issues check: https://issues.apache.org/jira/browse/BEAM-1251
    :type py_interpreter: str

    :returns: a tuple of three operators, (prediction, summary, validation)
    :rtype: tuple(DataFlowPythonOperator, DataFlowPythonOperator,
                  PythonOperator)
    """
    batch_prediction_job_id = batch_prediction_job_id or ""
    dataflow_options = dataflow_options or {}
    region = region or ""

    # Verify that task_prefix doesn't have any special characters except hyphen
    # '-', which is the only allowed non-alphanumeric character by Dataflow.
    if not re.match(r"^[a-zA-Z][-A-Za-z0-9]*$", task_prefix):
        raise AirflowException(
            "Malformed task_id for DataFlowPythonOperator (only alphanumeric "
            "and hyphens are allowed but got: " + task_prefix
        )

    metric_fn, metric_keys = metric_fn_and_keys
    if not callable(metric_fn):
        raise AirflowException("`metric_fn` param must be callable.")
    if not callable(validate_fn):
        raise AirflowException("`validate_fn` param must be callable.")

    if dag is not None and dag.default_args is not None:
        default_args = dag.default_args
        project_id = project_id or default_args.get('project_id')
        region = region or default_args['region']
        model_name = model_name or default_args.get('model_name')
        version_name = version_name or default_args.get('version_name')
        dataflow_options = dataflow_options or default_args.get('dataflow_default_options')

    evaluate_prediction = MLEngineStartBatchPredictionJobOperator(
        task_id=(task_prefix + "-prediction"),
        project_id=project_id,
        job_id=batch_prediction_job_id,
        region=region,
        data_format=data_format,
        input_paths=input_paths,
        output_path=prediction_path,
        uri=model_uri,
        model_name=model_name,
        version_name=version_name,
        dag=dag,
    )

    metric_fn_encoded = base64.b64encode(dill.dumps(metric_fn, recurse=True)).decode()
    evaluate_summary = DataflowCreatePythonJobOperator(
        task_id=(task_prefix + "-summary"),
        py_file=os.path.join(os.path.dirname(__file__), 'mlengine_prediction_summary.py'),
        dataflow_default_options=dataflow_options,
        options={
            "prediction_path": prediction_path,
            "metric_fn_encoded": metric_fn_encoded,
            "metric_keys": ','.join(metric_keys),
        },
        py_interpreter=py_interpreter,
        py_requirements=['apache-beam[gcp]>=2.14.0'],
        dag=dag,
    )
    evaluate_summary.set_upstream(evaluate_prediction)

    def apply_validate_fn(*args, templates_dict, **kwargs):
        prediction_path = templates_dict["prediction_path"]
        scheme, bucket, obj, _, _ = urlsplit(prediction_path)
        if scheme != "gs" or not bucket or not obj:
            raise ValueError(f"Wrong format prediction_path: {prediction_path}")
        summary = os.path.join(obj.strip("/"), "prediction.summary.json")
        gcs_hook = GCSHook()
        summary = json.loads(gcs_hook.download(bucket, summary))
        return validate_fn(summary)

    evaluate_validation = PythonOperator(
        task_id=(task_prefix + "-validation"),
        python_callable=apply_validate_fn,
        templates_dict={"prediction_path": prediction_path},
        dag=dag,
    )
    evaluate_validation.set_upstream(evaluate_summary)

    return evaluate_prediction, evaluate_summary, evaluate_validation
コード例 #56
0
if HOST:
    host_galaxy_comp = HostGalaxyComponent()
    model.components.append(host_galaxy_comp)

if BC or BpC:
    balmer_comp = BalmerCombined(BalmerContinuum=BC,
                                 BalmerPseudocContinuum=BpC)
    model.components.append(balmer_comp)

if Calzetti_ext or SMC_ext or MW_ext or AGN_ext or LMC_ext:
    ext_comp = Extinction(MW=MW_ext,
                          AGN=AGN_ext,
                          LMC=LMC_ext,
                          SMC=SMC_ext,
                          Calzetti=Calzetti_ext)
    model.components.append(ext_comp)

model.data_spectrum = spectrum  # add data
# ------------
# Run MCMC
# ------------
model.run_mcmc(n_walkers=n_walkers, n_iterations=n_iterations)
print("Mean acceptance fraction: {0:.3f}".format(
    np.mean(model.sampler.acceptance_fraction)))

# -------------
# save chains & model
# ------------
with gzip.open('model.pickle.gz', 'wb') as model_output:
    model_output.write(pickle.dumps(model))
コード例 #57
0
ファイル: utils.py プロジェクト: krl97/MapReduce
def msg_serialize(objects: list):
    return [dill.dumps(obj) for obj in objects]
コード例 #58
0
ファイル: plugin.py プロジェクト: sivashh/agent-python-pytest
def pytest_configure_node(node):
    if node.config._reportportal_enabled is False:
        # Stop now if the plugin is not properly configured
        return
    node.slaveinput['py_test_service'] = pickle.dumps(
        node.config.py_test_service)
コード例 #59
0
 def __init__(self, function):
     self.function = dill.dumps(function)
     self.params = None
     self.eval_result = None
コード例 #60
0
def export_object(obj):
    import dill as pickle
    import base64
    return base64.b64encode(gzip.zlib.compress(pickle.dumps(obj, 4),
                                               9)).decode('utf-8')