Ejemplo n.º 1
0
def test_mpi_objects():
    # Neighbours
    grid = Grid(shape=(4, 4, 4))
    obj = grid.distributor._obj_neighborhood
    pkl_obj = pickle.dumps(obj)
    new_obj = pickle.loads(pkl_obj)
    assert obj.name == new_obj.name
    assert obj.pname == new_obj.pname
    assert obj.pfields == new_obj.pfields

    # Communicator
    obj = grid.distributor._obj_comm
    pkl_obj = pickle.dumps(obj)
    new_obj = pickle.loads(pkl_obj)
    assert obj.name == new_obj.name
    assert obj.dtype == new_obj.dtype

    # Status
    obj = MPIStatusObject(name='status')
    pkl_obj = pickle.dumps(obj)
    new_obj = pickle.loads(pkl_obj)
    assert obj.name == new_obj.name
    assert obj.dtype == new_obj.dtype

    # Request
    obj = MPIRequestObject(name='request')
    pkl_obj = pickle.dumps(obj)
    new_obj = pickle.loads(pkl_obj)
    assert obj.name == new_obj.name
    assert obj.dtype == new_obj.dtype
Ejemplo n.º 2
0
    def deserialize_model(self, blob, model_id, model_version):
        """
        Deserializes the given blob to Model object which can be used for predictions
        :param blob:
        :param model_id:
        :param model_version:
        :return:
        """
        model_obj = cloudpickle.loads(blob)

        if not isinstance(model_obj, dict):  # Is a plain cloud-pickled model
            return model_obj

        if isinstance(model_obj, dict) and 'custom_package_blob' in model_obj.keys():
            self.__custom_package_deployer.install_custom_package(blob, model_id, model_version, delete_previous = True)

        if 'serialization_mechanism' in model_obj and model_obj['serialization_mechanism'] == 'asm':  # Is an ASM model
            self.__extract_model_resources(model_obj, model_id, model_version)
            self.__extract_prediction_module(model_obj, model_id, model_version)
            return self.__deserialize_asm_model(model_id, model_version)

        # tar_file_content = model_obj['custom_package_blob']
        # custom_package_name = model_obj['custom_package_name']
        # custom_package_version = model_obj['custom_package_version']
        return cloudpickle.loads(model_obj['model_blob'])  # Is a cloud-pickled model with custom code
Ejemplo n.º 3
0
 def _handle_spyder_msg(self, msg):
     """
     Handle internal spyder messages
     """
     spyder_msg_type = msg['content'].get('spyder_msg_type')
     if spyder_msg_type == 'data':
         # Deserialize data
         try:
             if PY2:
                 value = cloudpickle.loads(msg['buffers'][0])
             else:
                 value = cloudpickle.loads(bytes(msg['buffers'][0]))
         except Exception as msg:
             self._kernel_value = None
             self._kernel_reply = repr(msg)
         else:
             self._kernel_value = value
         self.sig_got_reply.emit()
         return
     elif spyder_msg_type == 'pdb_state':
         pdb_state = msg['content']['pdb_state']
         if pdb_state is not None and isinstance(pdb_state, dict):
             self.refresh_from_pdb(pdb_state)
     elif spyder_msg_type == 'pdb_continue':
         # Run Pdb continue to get to the first breakpoint
         # Fixes 2034
         self.write_to_stdin('continue')
     elif spyder_msg_type == 'set_breakpoints':
         self.set_spyder_breakpoints(force=True)
     else:
         logger.debug("No such spyder message type: %s" % spyder_msg_type)
Ejemplo n.º 4
0
def test_operator_parameters():
    grid = Grid(shape=(3, 3, 3))
    f = Function(name='f', grid=grid)
    g = TimeFunction(name='g', grid=grid)
    h = TimeFunction(name='h', grid=grid, save=10)
    op = Operator(Eq(h.forward, h + g + f + 1))
    for i in op.parameters:
        pkl_i = pickle.dumps(i)
        pickle.loads(pkl_i)
Ejemplo n.º 5
0
def test_queue_serde(zk):
    queue = Queue(zk, '/satyr/serde')
    queue.put(cp.dumps({'a': 1, 'b': 2}))
    queue.put(cp.dumps({'c': 3}))

    pickled_queue = cp.dumps(queue)
    unpickled_queue = cp.loads(pickled_queue)

    assert cp.loads(unpickled_queue.get()) == {'a': 1, 'b': 2}
    assert cp.loads(unpickled_queue.get()) == {'c': 3}
Ejemplo n.º 6
0
def test_locking_queue_serde(zk):
    queue = LockingQueue(zk, '/satyr/serde_locking')
    queue.put(cp.dumps({'a': 1, 'b': 2}))
    queue.put(cp.dumps({'c': 3}))

    pickled_queue = cp.dumps(queue)
    unpickled_queue = cp.loads(pickled_queue)

    assert cp.loads(unpickled_queue.get()) == {'a': 1, 'b': 2}
    unpickled_queue.consume()
    assert cp.loads(unpickled_queue.get()) == {'c': 3}
    unpickled_queue.consume()
Ejemplo n.º 7
0
def test_internal_symbols():
    s = dSymbol(name='s', dtype=np.float32)
    pkl_s = pickle.dumps(s)
    new_s = pickle.loads(pkl_s)
    assert new_s.name == s.name
    assert new_s.dtype is np.float32

    s = Scalar(name='s', dtype=np.int32, is_const=True)
    pkl_s = pickle.dumps(s)
    new_s = pickle.loads(pkl_s)
    assert new_s.name == s.name
    assert new_s.dtype is np.int32
    assert new_s.is_const is True
Ejemplo n.º 8
0
def test_dumps_task():
    d = dumps_task((inc, 1))
    assert set(d) == {'function', 'args'}

    f = lambda x, y=2: x + y
    d = dumps_task((apply, f, (1,), {'y': 10}))
    assert cloudpickle.loads(d['function'])(1, 2) == 3
    assert cloudpickle.loads(d['args']) == (1,)
    assert cloudpickle.loads(d['kwargs']) == {'y': 10}

    d = dumps_task((apply, f, (1,)))
    assert cloudpickle.loads(d['function'])(1, 2) == 3
    assert cloudpickle.loads(d['args']) == (1,)
    assert set(d) == {'function', 'args'}
Ejemplo n.º 9
0
 def result(self, timeout=None):
     """
     :param timeout: int,
     :return:
     """
     now = time.time()
     sleep = 0.5
     count = 0
     if timeout:
         future = now + timeout
     else:
         future = float("inf")
     while not self._is_done() and time.time() < future:
         self._update_status()
         count += 1
         time.sleep(exp_backoff(sleep, count))
     if time.time() > future and not self._is_done():
         raise TimeoutError()
     # result should be ready:
     results = []
     while True:
         result = self.ag.actors.getOneExecutionResult(actorId=self.actor_id, executionId=self.execution_id).content
         if not result:
             break
         results.append(cloudpickle.loads(result))
     return results
Ejemplo n.º 10
0
def _send_op(result, foo, chunk, op, index, target_ip, own_ip, port, timeout):
    '''
    Sends an operation over the network for a server to process, and 
    receives the result. Since we want each chunk to be sent in 
    parallel, this should be threaded

    :param result: empty list passed by reference which will contain the result. 
        Necessary because threads don't allow standard return values
    :param foo: function to use for map, filter, or reduce calls
    :param chunk: chunk to perform operation on
    :param op: string corresponding to operation to perform: 
        'map', 'filter', 'reduce'
    :param index: chunk number to allow ordering of processed chunks
    :param port: port of server
    '''
    try:
        dict_sending = {'func': foo, 'chunk': chunk, 'op': op, 'index': index}
        csts = threading.Thread(
            target = _client_socket_thread_send,
            args = (target_ip, port, pickle.dumps(dict_sending), timeout))
        csts.start()
        queue = Queue.Queue()
        cstr = threading.Thread(
            target = _client_socket_thread_receive,
            args = (own_ip, port+1, queue, timeout))
        cstr.start()
        cstr.join(timeout = None)
        response = pickle.loads(queue.get())
        result[response['index']] = response['chunk']
    except RuntimeError, socket.timeout:
        return #do nothing on error, just end and the client will restart the sending protocol
Ejemplo n.º 11
0
def test_geometry():

    shape = (50, 50, 50)
    spacing = [10. for _ in shape]
    nbpml = 10
    nrec = 10
    tn = 150.

    # Create two-layer model from preset
    model = demo_model(preset='layers-isotropic', vp_top=1., vp_bottom=2.,
                       spacing=spacing, shape=shape, nbpml=nbpml)
    # Source and receiver geometries
    src_coordinates = np.empty((1, len(spacing)))
    src_coordinates[0, :] = np.array(model.domain_size) * .5
    if len(shape) > 1:
        src_coordinates[0, -1] = model.origin[-1] + 2 * spacing[-1]

    rec_coordinates = np.empty((nrec, len(spacing)))
    rec_coordinates[:, 0] = np.linspace(0., model.domain_size[0], num=nrec)
    if len(shape) > 1:
        rec_coordinates[:, 1] = np.array(model.domain_size)[1] * .5
        rec_coordinates[:, -1] = model.origin[-1] + 2 * spacing[-1]
    geometry = AcquisitionGeometry(model, rec_coordinates, src_coordinates,
                                   t0=0.0, tn=tn, src_type='Ricker', f0=0.010)

    pkl_geom = pickle.dumps(geometry)
    new_geom = pickle.loads(pkl_geom)

    assert np.all(new_geom.src_positions == geometry.src_positions)
    assert np.all(new_geom.rec_positions == geometry.rec_positions)
    assert new_geom.f0 == geometry.f0
    assert np.all(new_geom.src_type == geometry.src_type)
    assert np.all(new_geom.src.data == geometry.src.data)
    assert new_geom.t0 == geometry.t0
    assert new_geom.tn == geometry.tn
Ejemplo n.º 12
0
    def from_dict(cls, node_dict):
        """
        Creates a node from a dict representation
        :param node_dict: dict
        :return: datamodel.base.node.Node
        """
        # import classes, so we can instantiate them at need
        import robograph.datamodel

        # retrieve the class object so we can instantiate the node
        klass = eval(node_dict['class'])
        node = klass(name=node_dict['name'])

        # now retrieve the parameters
        parameters = dict()
        for p in node_dict['params']:
            parameters[p] = node_dict['params'][p]

            # are we deserializing a pickled function?
            if isinstance(node_dict['params'][p], unicode):
                if "py/bytes" in node_dict['params'][p]:
                    parameters[p] = cloudpickle.loads(jsonpickle.loads(node_dict['params'][p]))

        node.input(parameters)
        node.set_output_label(node_dict['output_label'])
        return node
Ejemplo n.º 13
0
    def do_recv_broadcast(self, sock):
        req_header = common.sock_recv(sock, 16)
        key_data_len, value_data_len = struct.unpack('qq', req_header)
        key_data = common.sock_recv(sock, key_data_len)
        value_data = common.sock_recv(sock, value_data_len)
        key = cloudpickle.loads(key_data)
        value = cloudpickle.loads(value_data)
        self.server.broadcast_vars[key] = value
        
        result = {}
        result['success'] = True

        send_data = cloudpickle.dumps(result)
        send_data_len = len(send_data)
        sock.sendall(struct.pack('q', send_data_len))
        sock.sendall(send_data)
Ejemplo n.º 14
0
def test_symbolics():
    a = Symbol('a')

    id = IntDiv(a, 3)
    pkl_id = pickle.dumps(id)
    new_id = pickle.loads(pkl_id)
    assert id == new_id

    ffp = FunctionFromPointer('foo', a, ['b', 'c'])
    pkl_ffp = pickle.dumps(ffp)
    new_ffp = pickle.loads(pkl_ffp)
    assert ffp == new_ffp

    li = ListInitializer(['a', 'b'])
    pkl_li = pickle.dumps(li)
    new_li = pickle.loads(pkl_li)
    assert li == new_li
Ejemplo n.º 15
0
def test_dumps_function():
    a = dumps_function(inc)
    assert cloudpickle.loads(a)(10) == 11

    b = dumps_function(inc)
    assert a is b

    c = dumps_function(dec)
    assert a != c
Ejemplo n.º 16
0
def test_timers():
    """Pickling for Timers used in Operators for C-level profiling."""
    timer = Timer('timer', ['sec0', 'sec1'])
    pkl_obj = pickle.dumps(timer)
    new_obj = pickle.loads(pkl_obj)
    assert new_obj.name == timer.name
    assert new_obj.sections == timer.sections
    assert new_obj.value._obj.sec0 == timer.value._obj.sec0 == 0.0
    assert new_obj.value._obj.sec1 == timer.value._obj.sec1 == 0.0
Ejemplo n.º 17
0
    def do_execute(self, sock):
        req_header = common.sock_recv(sock, 32)
        func_len, func_args_len, func_kwargs_len, bits = struct.unpack('qqqq', req_header)
        func = common.sock_recv(sock, func_len)
        func_args = common.sock_recv(sock, func_args_len)
        func_kwargs = common.sock_recv(sock, func_kwargs_len)
        is_fork = bits & 0x1

        func = cloudpickle.loads(func)
        func_args = cloudpickle.loads(func_args)
        func_kwargs = cloudpickle.loads(func_kwargs)

        # replace broadcast vars
        for i, e in enumerate(func_args):
            if type(e) == common.BroadcastVariableRef:
                func_args[i] = self.server.broadcast_vars[e.key]

        if not is_fork:
            result = func(*func_args, **func_kwargs)
            ser_result = cloudpickle.dumps(result)
        else:
            rd, wr = os.pipe()
            pid = os.fork()
            if pid == 0: # child
                result = func(*func_args, **func_kwargs)
                ser_result = cloudpickle.dumps(result)
                os.write(wr, struct.pack('q', len(ser_result)))
                i = 0
                while i < len(ser_result):
                    i += os.write(wr, ser_result[i:])
                # avoid exception
                os._exit(0)
            else: # parent
                try:
                    ser_len = struct.unpack('q', os.read(rd, 8))[0]
                    ser_result = common.pipe_recv(rd, ser_len)
                finally:
                    os.close(rd)
                    os.close(wr)

        send_data = ser_result
        send_data_len = len(send_data)
        sock.sendall(struct.pack('q', send_data_len))
        sock.sendall(send_data)
    def run(self):
        '''
        Runs core loop of server. It continually listens on a port and waits 
        until it receives a message, which is added to the queue. The server 
        then determines the type of operation wanted, processes the chunk, 
        and sends the results back to the calling client. It continues to 
        process these commands until the queue is empty, at which point it 
        returns to waiting
        '''
        #infinite looping listening thread to identify itself to clients
        print('Server is Running')

        self.bst = helpers._Broadcast_Server_Thread(MULTICAST_GROUP_IP, 
            MULTICAST_PORT, self.chunk_queue)
        self.bst.start()

        #infinite looping listening thread for chunks
        self.sstr = helpers._Server_Socket_Thread_Receive(IP_ADDRESS, 
            self.port, self.chunk_queue)
        self.sstr.start()
        #infinitely loops until calling process calls stop()
        while not self._abort:
            
            # sleep so we don't busy wait
            time.sleep(0.01)

            if not self.chunk_queue.empty():
                full_chunk = self.chunk_queue.get()
                dict_received = pickle.loads(full_chunk[0])
                chunk = dict_received['chunk']
                func = dict_received['func']
                op = dict_received['op']

                if op == 'map':
                    processed_chunk = helpers._single_map(func, chunk)
                elif op == 'filter':
                    processed_chunk = helpers._single_filter(func, chunk)
                elif op == 'reduce':
                    processed_chunk = helpers._single_reduce(func, chunk)
                # TODO: raise an error here
                else:
                    processed_chunk = 'This operation does not exist.'
                
                dict_sent = {
                    'chunk': processed_chunk, 
                    'index': dict_received['index']
                }

                #sends results back on port+1
                self.ssts = threading.Thread(
                    target = helpers._server_socket_thread_send, 
                    args = (full_chunk[1], self.port + 1, 
                        pickle.dumps(dict_sent))
                )
                self.ssts.start()
        self.sstr.stop() #nicely close sockets at the end
Ejemplo n.º 19
0
def test_unjitted_operator():
    grid = Grid(shape=(3, 3, 3))
    f = Function(name='f', grid=grid)

    op = Operator(Eq(f, f + 1))

    pkl_op = pickle.dumps(op)
    new_op = pickle.loads(pkl_op)

    assert str(op) == str(new_op)
Ejemplo n.º 20
0
    def load(self, filename):
        """
        :param filename: file name to load from
        :type name: str

        Load optimizer state from disk
        """
        with open(filename, "rb") as input_file:
            state = cloudpickle.loads(input_file.read())
        self.set_state(state)
Ejemplo n.º 21
0
 def test_function_pickle_compat_0_4_0(self):
     # The result of `cloudpickle.dumps(lambda x: x)` in cloudpickle 0.4.0,
     # Python 2.7
     pickled = (b'\x80\x02ccloudpickle.cloudpickle\n_fill_function\nq\x00(c'
         b'cloudpickle.cloudpickle\n_make_skel_func\nq\x01ccloudpickle.clou'
         b'dpickle\n_builtin_type\nq\x02U\x08CodeTypeq\x03\x85q\x04Rq\x05(K'
         b'\x01K\x01K\x01KCU\x04|\x00\x00Sq\x06N\x85q\x07)U\x01xq\x08\x85q'
         b'\tU\x07<stdin>q\nU\x08<lambda>q\x0bK\x01U\x00q\x0c))tq\rRq\x0eJ'
         b'\xff\xff\xff\xff}q\x0f\x87q\x10Rq\x11}q\x12N}q\x13NtR.')
     self.assertEqual(42, cloudpickle.loads(pickled)(42))
Ejemplo n.º 22
0
def test_constant():
    c = Constant(name='c')
    assert c.data == 0.
    c.data = 1.

    pkl_c = pickle.dumps(c)
    new_c = pickle.loads(pkl_c)

    # .data is initialized, so it should have been pickled too
    assert np.all(c.data == 1.)
    assert np.all(new_c.data == 1.)
Ejemplo n.º 23
0
    def get(self, block=True, timeout=-1):
        result = super(Queue, self).get()

        if block:
            try:
                with seconds(timeout):
                    while result is None:
                        result = super(Queue, self).get()
                        time.sleep(0.1)
            except TimeoutError:
                raise Empty

        return cloudpickle.loads(result)
Ejemplo n.º 24
0
def test_queue_apply_async(zk, resources):
    def feed(i, queue):
        queue.put(cp.dumps(i))

    queue = Queue(zk, '/satyr/test-pool')
    with Pool(name='test-pool') as pool:
        results = [pool.apply_async(feed, [i, queue], resources=resources)
                   for i in range(5)]
        pool.wait(seconds=30)

    time.sleep(1)
    results = [cp.loads(queue.get()) for i in range(5)]
    assert sorted(results) == range(5)
Ejemplo n.º 25
0
def test_operator_function():
    grid = Grid(shape=(3, 3, 3))
    f = Function(name='f', grid=grid)

    op = Operator(Eq(f, f + 1))
    op.apply()

    pkl_op = pickle.dumps(op)
    new_op = pickle.loads(pkl_op)

    assert str(op) == str(new_op)

    new_op.apply(f=f)
    assert np.all(f.data == 2)
Ejemplo n.º 26
0
def test_operator_timefunction():
    grid = Grid(shape=(3, 3, 3))
    f = TimeFunction(name='f', grid=grid, save=3)

    op = Operator(Eq(f.forward, f + 1))
    op.apply(time=0)

    pkl_op = pickle.dumps(op)
    new_op = pickle.loads(pkl_op)

    assert str(op) == str(new_op)

    new_op.apply(time_m=1, time_M=1, f=f)
    assert np.all(f.data[2] == 2)
Ejemplo n.º 27
0
def test_receiver():
    grid = Grid(shape=(3,))
    time_range = TimeAxis(start=0., stop=1000., step=0.1)
    nreceivers = 3

    rec = Receiver(name='rec', grid=grid, time_range=time_range, npoint=nreceivers,
                   coordinates=[(0.,), (1.,), (2.,)])
    rec.data[:] = 1.

    pkl_rec = pickle.dumps(rec)
    new_rec = pickle.loads(pkl_rec)

    assert np.all(new_rec.data == 1)
    assert np.all(new_rec.coordinates.data == [[0.], [1.], [2.]])
Ejemplo n.º 28
0
def test_feed(s, a, b):
    def func(scheduler):
        return dumps((scheduler.processing, scheduler.stacks))

    stream = yield connect(s.ip, s.port)
    yield write(stream, {'op': 'feed',
                         'function': dumps(func),
                         'interval': 0.01})

    for i in range(5):
        response = yield read(stream)
        expected = s.processing, s.stacks
        assert cloudpickle.loads(response) == expected

    stream.close()
Ejemplo n.º 29
0
def test_function():
    grid = Grid(shape=(3, 3, 3))
    f = Function(name='f', grid=grid)
    f.data[0] = 1.

    pkl_f = pickle.dumps(f)
    new_f = pickle.loads(pkl_f)

    # .data is initialized, so it should have been pickled too
    assert np.all(f.data[0] == 1.)
    assert np.all(new_f.data[0] == 1.)

    assert f.space_order == new_f.space_order
    assert f.dtype == new_f.dtype
    assert f.shape == new_f.shape
Ejemplo n.º 30
0
def test_feed(s, a, b):
    def func(scheduler):
        return dumps(dict(scheduler.worker_info))

    comm = yield connect(s.address)
    yield comm.write({'op': 'feed',
                      'function': dumps(func),
                      'interval': 0.01})

    for i in range(5):
        response = yield comm.read()
        expected = dict(s.worker_info)
        assert cloudpickle.loads(response) == expected

    yield comm.close()
Ejemplo n.º 31
0
def loads(payload):
    with _lock:
        # there is race condition for pickle.load() used in multi-thread environment,
        # see https://bugs.python.org/issue36773 for more details.
        return cloudpickle.loads(payload)
Ejemplo n.º 32
0
 def __setstate__(self, state):
     self.work_id, self.args, self.kwargs, self.fn, cp = state
     if cp:
         from cloudpickle import loads
         self.fn = loads(self.fn)
def unpack(data):
    # data = base64.b64decode(data)
    data = lz4.frame.decompress(data)
    data = pickle.loads(data)
    return data
Ejemplo n.º 34
0
def test_full_model():

    shape = (50, 50, 50)
    spacing = [10. for _ in shape]
    nbpml = 10

    # Create two-layer model from preset
    model = demo_model(preset='layers-isotropic', vp_top=1., vp_bottom=2.,
                       spacing=spacing, shape=shape, nbpml=nbpml)

    # Test Model pickling
    pkl_model = pickle.dumps(model)
    new_model = pickle.loads(pkl_model)
    assert np.isclose(np.linalg.norm(model.vp-new_model.vp), 0)

    f0 = .010
    dt = model.critical_dt
    t0 = 0.0
    tn = 350.0
    time_range = TimeAxis(start=t0, stop=tn, step=dt)

    # Test TimeAxis pickling
    pkl_time_range = pickle.dumps(time_range)
    new_time_range = pickle.loads(pkl_time_range)
    assert np.isclose(np.linalg.norm(time_range.time_values),
                      np.linalg.norm(new_time_range.time_values))

    # Test Class Constant pickling
    pkl_origin = pickle.dumps(model.grid.origin)
    new_origin = pickle.loads(pkl_origin)

    for a, b in zip(model.grid.origin, new_origin):
        assert a.compare(b) == 0

    # Test Class TimeDimension pickling
    time_dim = TimeDimension(name='time', spacing=Constant(name='dt', dtype=np.float32))
    pkl_time_dim = pickle.dumps(time_dim)
    new_time_dim = pickle.loads(pkl_time_dim)
    assert time_dim.spacing._value == new_time_dim.spacing._value

    # Test Class SteppingDimension
    stepping_dim = SteppingDimension(name='t', parent=time_dim)
    pkl_stepping_dim = pickle.dumps(stepping_dim)
    new_stepping_dim = pickle.loads(pkl_stepping_dim)
    assert stepping_dim.is_Time == new_stepping_dim.is_Time

    # Test Grid pickling
    pkl_grid = pickle.dumps(model.grid)
    new_grid = pickle.loads(pkl_grid)
    assert model.grid.shape == new_grid.shape

    assert model.grid.extent == new_grid.extent
    assert model.grid.shape == new_grid.shape
    for a, b in zip(model.grid.dimensions, new_grid.dimensions):
        assert a.compare(b) == 0

    ricker = RickerSource(name='src', grid=model.grid, f0=f0, time_range=time_range)

    pkl_ricker = pickle.dumps(ricker)
    new_ricker = pickle.loads(pkl_ricker)
    assert np.isclose(np.linalg.norm(ricker.data), np.linalg.norm(new_ricker.data))
Ejemplo n.º 35
0
    def main():
        # Parse the arguments.
        args = parse_arguments()
        theme_name = t_theme_name.get()

        args.model_folder_name = os.path.join(theme_name, 'chainer')
        #args.epoch = int(float(t_epochs.get()))
        args.out = parent_path / 'models' / theme_name / method_name
        args.method = method_name

        if args.label:
            labels = args.label
        else:
            raise ValueError('No target label was specified.')

        # Dataset preparation.
        def postprocess_label(label_list):
            return numpy.asarray(label_list, dtype=numpy.float32)

        smiles_col_name = t_smiles.get()
        print('Preprocessing dataset...')
        preprocessor = preprocess_method_dict[args.method]()
        parser = CSVFileParser(preprocessor,
                               postprocess_label=postprocess_label,
                               labels=labels,
                               smiles_col=t_smiles.get())

        #args.datafile=parent_path / 'results' /  theme_name / method_name / high_low /'brics_virtual'  / 'virtual.csv'
        args.datafile = csv_path
        dataset = parser.parse(args.datafile)['dataset']

        @chainer.dataset.converter()
        def extract_inputs(batch, device=None):
            return concat_mols(batch, device=device)[:-1]

        print('Predicting the virtual library')
        # Set up the regressor.
        device = chainer.get_device(args.device)
        model_path = os.path.join(args.out, args.model_foldername,
                                  args.model_filename)

        with open(
                parent_path / 'models' / theme_name / method_name / high_low /
            ('regressor.pickle'), 'rb') as f:
            regressor = cloudpickle.loads(f.read())

        # Perform the prediction.
        print('Evaluating...')
        converter = converter_method_dict[args.method]
        data_iterator = SerialIterator(dataset,
                                       16,
                                       repeat=False,
                                       shuffle=False)
        eval_result = Evaluator(data_iterator,
                                regressor,
                                converter=converter,
                                device=device)()
        print('Evaluation result: ', eval_result)

        predict_ = regressor.predict(dataset, converter=extract_inputs)
        predict_ = [i[0] for i in predict_]
        df_data = pd.read_csv(csv_path)

        df_predict = df_data
        df_predict[t_task.get()] = predict_
        df_predict = df_predict.dropna()

        PandasTools.AddMoleculeColumnToFrame(frame=df_predict,
                                             smilesCol=t_smiles.get())
        df_predict['sascore'] = df_predict.ROMol.map(sascorer.calculateScore)

        df_predict.to_csv(csv_path)

        png_generator = (parent_path / 'results' / theme_name / method_name /
                         high_low / data_name /
                         'molecular-structure').glob('*.png')
        #png_generator.sort()

        for i, png_path in enumerate(png_generator):
            #print((png_path.name)[4:10])
            i = int((png_path.name)[4:10])
            if i < len(df_predict[t_task.get()]):
                img = Image.open(png_path)
                draw = ImageDraw.Draw(img)
                font = ImageFont.truetype('arial.ttf', 26)
                draw.text((0, 0),
                          t_task.get() + ' : ' +
                          str(round(df_predict[t_task.get()][i], 2)),
                          (0, 0, 0),
                          font=font)
                draw.text(
                    (0, 30),
                    'sascore : ' + str(round(df_predict['sascore'][i], 2)),
                    (0, 0, 0),
                    font=font)

                img.save(png_path)

        save_json(os.path.join(args.out, 'eval_result.json'), eval_result)
Ejemplo n.º 36
0
import pandas as pd
import cloudpickle
import os

this_path = os.path.dirname(__file__)
fn = os.path.join(this_path, '../data/token_str.pkl')
fh = open(fn, 'rb')
token_str = cloudpickle.loads(fh.read())

def preprocess_table(filename):
	'''
	Open a file of data & clean/normalize it to our schema
	'''

	df = pd.read_csv(filename)

	# drop duplicate rows
	df = df.drop_duplicates()

	# fill missing values
	df['Description'] = df['Description'].fillna('None')
	df['Flavor'] = df['Flavor'].fillna('None')

	# we're assuming the table of data lacks symptom info;
	# this part of the code might be taken out later
	df['Symptoms'] = ['None'] * df.shape[0]

	return df

def create_fulltext(df):
	'''
Ejemplo n.º 37
0
def run_experiment(argv):
    default_log_dir = config.GARAGE_LOG_DIR
    now = datetime.datetime.now(dateutil.tz.tzlocal())

    # avoid name clashes when running distributed jobs
    rand_id = str(uuid.uuid4())[:5]
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z')

    default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=('Number of parallel workers to perform rollouts. '
              "0 => don't start any workers"))
    parser.add_argument('--exp_name',
                        type=str,
                        default=default_exp_name,
                        help='Name of the experiment.')
    parser.add_argument('--log_dir',
                        type=str,
                        default=None,
                        help='Path to save the log and iteration snapshot.')
    parser.add_argument('--snapshot_mode',
                        type=str,
                        default='all',
                        help='Mode to save the snapshot. Can be either "all" '
                        '(all iterations will be saved), "last" (only '
                        'the last iteration will be saved), "gap" (every'
                        '`snapshot_gap` iterations are saved), or "none" '
                        '(do not save snapshots)')
    parser.add_argument('--snapshot_gap',
                        type=int,
                        default=1,
                        help='Gap between snapshot iterations.')
    parser.add_argument('--tabular_log_file',
                        type=str,
                        default='progress.csv',
                        help='Name of the tabular log file (in csv).')
    parser.add_argument('--text_log_file',
                        type=str,
                        default='debug.log',
                        help='Name of the text log file (in pure text).')
    parser.add_argument('--tensorboard_step_key',
                        type=str,
                        default=None,
                        help='Name of the step key in tensorboard_summary.')
    parser.add_argument('--params_log_file',
                        type=str,
                        default='params.json',
                        help='Name of the parameter log file (in json).')
    parser.add_argument('--variant_log_file',
                        type=str,
                        default='variant.json',
                        help='Name of the variant log file (in json).')
    parser.add_argument(
        '--resume_from_dir',
        type=str,
        default=None,
        help='Directory of the pickle file to resume experiment from.')
    parser.add_argument('--resume_epoch',
                        type=str,
                        default=None,
                        help='Index of iteration to restore from. '
                        'Can be "first", "last" or a number. '
                        'Not applicable when snapshot_mode="last"')
    parser.add_argument('--plot',
                        type=ast.literal_eval,
                        default=False,
                        help='Whether to plot the iteration results')
    parser.add_argument(
        '--log_tabular_only',
        type=ast.literal_eval,
        default=False,
        help='Print only the tabular log information (in a horizontal format)')
    parser.add_argument('--seed', type=int, help='Random seed for numpy')
    parser.add_argument('--args_data',
                        type=str,
                        help='Pickled data for objects')
    parser.add_argument('--variant_data',
                        type=str,
                        help='Pickled data for variant configuration')
    parser.add_argument('--use_cloudpickle',
                        type=ast.literal_eval,
                        default=False)

    args = parser.parse_args(argv[1:])

    if args.seed is not None:
        deterministic.set_seed(args.seed)

    # SIGINT is blocked for all processes created in parallel_sampler to avoid
    # the creation of sleeping and zombie processes.
    #
    # If the user interrupts run_experiment, there's a chance some processes
    # won't die due to a dead lock condition where one of the children in the
    # parallel sampler exits without releasing a lock once after it catches
    # SIGINT.
    #
    # Later the parent tries to acquire the same lock to proceed with his
    # cleanup, but it remains sleeping waiting for the lock to be released.
    # In the meantime, all the process in parallel sampler remain in the zombie
    # state since the parent cannot proceed with their clean up.
    with mask_signals([signal.SIGINT]):
        if args.n_parallel > 0:
            parallel_sampler.initialize(n_parallel=args.n_parallel)
            if args.seed is not None:
                parallel_sampler.set_seed(args.seed)

    if not args.plot:
        garage.plotter.Plotter.disable()
        garage.tf.plotter.Plotter.disable()

    if args.log_dir is None:
        if args.resume_from_dir is None:
            log_dir = osp.join(default_log_dir, args.exp_name)
        else:
            log_dir = args.resume_from_dir
    else:
        log_dir = args.log_dir
    tabular_log_file = osp.join(log_dir, args.tabular_log_file)
    text_log_file = osp.join(log_dir, args.text_log_file)
    params_log_file = osp.join(log_dir, args.params_log_file)

    if args.variant_data is not None:
        variant_data = pickle.loads(base64.b64decode(args.variant_data))
        variant_log_file = osp.join(log_dir, args.variant_log_file)
        dump_variant(variant_log_file, variant_data)
    else:
        variant_data = None

    if not args.use_cloudpickle:
        log_parameters(params_log_file, args)

    logger.add_output(dowel.TextOutput(text_log_file))
    logger.add_output(dowel.CsvOutput(tabular_log_file))
    logger.add_output(dowel.TensorBoardOutput(log_dir))
    logger.add_output(dowel.StdOutput())
    prev_snapshot_dir = snapshotter.snapshot_dir
    prev_mode = snapshotter.snapshot_mode
    snapshotter.snapshot_dir = log_dir
    snapshotter.snapshot_mode = args.snapshot_mode
    snapshotter.snapshot_gap = args.snapshot_gap
    logger.push_prefix('[%s] ' % args.exp_name)

    if args.resume_from_dir is not None:
        with LocalRunner() as runner:
            runner.restore(args.resume_from_dir, from_epoch=args.resume_epoch)
            runner.resume()
    else:
        # read from stdin
        if args.use_cloudpickle:
            import cloudpickle
            method_call = cloudpickle.loads(base64.b64decode(args.args_data))
            try:
                method_call(variant_data)
            except BaseException:
                children = garage.plotter.Plotter.get_plotters()
                children += garage.tf.plotter.Plotter.get_plotters()
                if args.n_parallel > 0:
                    children += [parallel_sampler]
                child_proc_shutdown(children)
                raise
        else:
            data = pickle.loads(base64.b64decode(args.args_data))
            maybe_iter = concretize(data)
            if is_iterable(maybe_iter):
                for _ in maybe_iter:
                    pass

    snapshotter.snapshot_mode = prev_mode
    snapshotter.snapshot_dir = prev_snapshot_dir
    logger.remove_all()
    logger.pop_prefix()
Ejemplo n.º 38
0
def loads(encoded):
    pickled = base64.b64decode(encoded)
    payload = cloudpickle.loads(pickled)
    return payload
Ejemplo n.º 39
0
def get_job_results(host, job_id):
    r = requests.post(urllib.parse.urljoin("http://" + host, "/job_results"),
                      data={"job_id": job_id})
    return pickle.loads(r.content)
Ejemplo n.º 40
0
def worker(
    parent_conn: Connection,
    step_queue: Queue,
    pickled_env_factory: str,
    worker_id: int,
    engine_configuration: EngineConfig,
    log_level: int = logging_util.INFO,
) -> None:
    env_factory: Callable[
        [int, List[SideChannel]], UnityEnvironment
    ] = cloudpickle.loads(pickled_env_factory)
    env_parameters = EnvironmentParametersChannel()
    engine_configuration_channel = EngineConfigurationChannel()
    engine_configuration_channel.set_configuration(engine_configuration)
    stats_channel = StatsSideChannel()
    env: BaseEnv = None
    # Set log level. On some platforms, the logger isn't common with the
    # main process, so we need to set it again.
    logging_util.set_log_level(log_level)

    def _send_response(cmd_name: EnvironmentCommand, payload: Any) -> None:
        parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))

    def _generate_all_results() -> AllStepResult:
        all_step_result: AllStepResult = {}
        for brain_name in env.behavior_specs:
            all_step_result[brain_name] = env.get_steps(brain_name)
        return all_step_result

    def external_brains():
        result = {}
        for behavior_name, behavior_specs in env.behavior_specs.items():
            result[behavior_name] = behavior_spec_to_brain_parameters(
                behavior_name, behavior_specs
            )
        return result

    try:
        env = env_factory(
            worker_id, [env_parameters, engine_configuration_channel, stats_channel]
        )
        while True:
            req: EnvironmentRequest = parent_conn.recv()
            if req.cmd == EnvironmentCommand.STEP:
                all_action_info = req.payload
                for brain_name, action_info in all_action_info.items():
                    if len(action_info.action) != 0:
                        env.set_actions(brain_name, action_info.action)
                env.step()
                all_step_result = _generate_all_results()
                # The timers in this process are independent from all the processes and the "main" process
                # So after we send back the root timer, we can safely clear them.
                # Note that we could randomly return timers a fraction of the time if we wanted to reduce
                # the data transferred.
                # TODO get gauges from the workers and merge them in the main process too.
                env_stats = stats_channel.get_and_reset_stats()
                step_response = StepResponse(
                    all_step_result, get_timer_root(), env_stats
                )
                step_queue.put(
                    EnvironmentResponse(
                        EnvironmentCommand.STEP, worker_id, step_response
                    )
                )
                reset_timers()
            elif req.cmd == EnvironmentCommand.EXTERNAL_BRAINS:
                _send_response(EnvironmentCommand.EXTERNAL_BRAINS, external_brains())
            elif req.cmd == EnvironmentCommand.RESET:
                for k, v in req.payload.items():
                    env_parameters.set_float_parameter(k, v)
                env.reset()
                all_step_result = _generate_all_results()
                _send_response(EnvironmentCommand.RESET, all_step_result)
            elif req.cmd == EnvironmentCommand.CLOSE:
                break
    except (
        KeyboardInterrupt,
        UnityCommunicationException,
        UnityTimeOutException,
        UnityEnvironmentException,
        UnityCommunicatorStoppedException,
    ) as ex:
        logger.info(f"UnityEnvironment worker {worker_id}: environment stopping.")
        step_queue.put(
            EnvironmentResponse(EnvironmentCommand.ENV_EXITED, worker_id, ex)
        )
        _send_response(EnvironmentCommand.ENV_EXITED, ex)
    finally:
        # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process
        # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread()
        # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for
        # more info.
        logger.debug(f"UnityEnvironment worker {worker_id} closing.")
        step_queue.cancel_join_thread()
        step_queue.close()
        if env is not None:
            env.close()
        logger.debug(f"UnityEnvironment worker {worker_id} done.")
Ejemplo n.º 41
0
def run_experiment(argv):
    default_log_dir = config.LOG_DIR
    now = datetime.datetime.now(dateutil.tz.tzlocal())

    # avoid name clashes when running distributed jobs
    rand_id = str(uuid.uuid4())[:5]
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z')

    default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id)
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--n_parallel',
        type=int,
        default=1,
        help=
        'Number of parallel workers to perform rollouts. 0 => don\'t start any workers'
    )
    parser.add_argument('--exp_name',
                        type=str,
                        default=default_exp_name,
                        help='Name of the experiment.')
    parser.add_argument('--log_dir',
                        type=str,
                        default=None,
                        help='Path to save the log and iteration snapshot.')
    parser.add_argument('--snapshot_mode',
                        type=str,
                        default='all',
                        help='Mode to save the snapshot. Can be either "all" '
                        '(all iterations will be saved), "last" (only '
                        'the last iteration will be saved), or "none" '
                        '(do not save snapshots)')
    parser.add_argument('--snapshot_gap',
                        type=int,
                        default=1,
                        help='Gap between snapshot iterations.')
    parser.add_argument('--tabular_log_file',
                        type=str,
                        default='progress_%s.csv' % timestamp,
                        help='Name of the tabular log file (in csv).')
    parser.add_argument('--text_log_file',
                        type=str,
                        default='debug.log',
                        help='Name of the text log file (in pure text).')
    parser.add_argument('--params_log_file',
                        type=str,
                        default='params.json',
                        help='Name of the parameter log file (in json).')
    parser.add_argument('--variant_log_file',
                        type=str,
                        default='variant.json',
                        help='Name of the variant log file (in json).')
    parser.add_argument(
        '--resume_from',
        type=str,
        default=None,
        help='Name of the pickle file to resume experiment from.')
    parser.add_argument('--plot',
                        type=ast.literal_eval,
                        default=False,
                        help='Whether to plot the iteration results')
    parser.add_argument(
        '--log_tabular_only',
        type=ast.literal_eval,
        default=False,
        help=
        'Whether to only print the tabular log information (in a horizontal format)'
    )
    parser.add_argument('--seed', type=int, help='Random seed for numpy')
    parser.add_argument('--args_data',
                        type=str,
                        help='Pickled data for stub objects')
    parser.add_argument('--variant_data',
                        type=str,
                        help='Pickled data for variant configuration')
    parser.add_argument('--use_cloudpickle',
                        type=ast.literal_eval,
                        default=False)

    args = parser.parse_args(argv[1:])

    if args.seed is not None:
        set_seed(args.seed)

    if args.n_parallel > 0:
        from rllab.sampler import parallel_sampler
        parallel_sampler.initialize(n_parallel=args.n_parallel)
        if args.seed is not None:
            parallel_sampler.set_seed(args.seed)

    if args.plot:
        from rllab.plotter import plotter
        plotter.init_worker()

    if args.log_dir is None:
        log_dir = osp.join(default_log_dir, args.exp_name)
    else:
        log_dir = args.log_dir
    tabular_log_file = osp.join(log_dir, args.tabular_log_file)
    text_log_file = osp.join(log_dir, args.text_log_file)
    params_log_file = osp.join(log_dir, args.params_log_file)

    if args.variant_data is not None:
        variant_data = pickle.loads(base64.b64decode(args.variant_data))
        variant_log_file = osp.join(log_dir, args.variant_log_file)
        logger.log_variant(variant_log_file, variant_data)
    else:
        variant_data = None

    if not args.use_cloudpickle:
        logger.log_parameters_lite(params_log_file, args)

    logger.add_text_output(text_log_file)
    logger.add_tabular_output(tabular_log_file)
    prev_snapshot_dir = logger.get_snapshot_dir()
    prev_mode = logger.get_snapshot_mode()
    logger.set_snapshot_dir(log_dir)
    logger.set_snapshot_mode(args.snapshot_mode)
    logger.set_snapshot_gap(args.snapshot_gap)
    logger.set_log_tabular_only(args.log_tabular_only)
    logger.push_prefix("[%s] " % args.exp_name)

    if args.resume_from is not None:
        data = joblib.load(args.resume_from)
        assert 'algo' in data
        algo = data['algo']
        algo.train()
    else:
        # read from stdin
        if args.use_cloudpickle:
            import cloudpickle
            method_call = cloudpickle.loads(base64.b64decode(args.args_data))
            method_call(variant_data)
        else:
            data = pickle.loads(base64.b64decode(args.args_data))
            maybe_iter = concretize(data)
            if is_iterable(maybe_iter):
                for _ in maybe_iter:
                    pass

    logger.set_snapshot_mode(prev_mode)
    logger.set_snapshot_dir(prev_snapshot_dir)
    logger.remove_tabular_output(tabular_log_file)
    logger.remove_text_output(text_log_file)
    logger.pop_prefix()
Ejemplo n.º 42
0
    def load(cls, npzfile, force_parse=False, verbose=False):

        global processed_raw_model

        if verbose:
            st = time.time()

        fdict = dict(np.load(npzfile, allow_pickle=True))

        mtxt = str(fdict['yaml_raw'])

        try:
            if force_parse:
                raise Exception
            pmodel = cpickle.loads(fdict['model_dump'])
        except:
            use_cached = False

            if 'processed_raw_model' in globals():
                use_cached = processed_raw_model.fdict['yaml_raw'] == mtxt

            if use_cached:
                pmodel = deepcopy(processed_raw_model)
            else:
                import tempfile

                try:
                    # cumbersome: load the text of the *_funcs file, write it to a temporary file, just to use it as a module
                    ftxt = str(fdict['ffile_raw'])
                    tfile = tempfile.NamedTemporaryFile(
                        'w', suffix='.py', delete=False)
                    tfile.write(ftxt)
                    tfile.close()
                    ffile = tfile.name
                except KeyError:
                    ffile = ''

                pmodel = cls.parse(mtxt, ffile)

                try:
                    tfile.close()
                    os.unlink(tfile.name)
                except:
                    pass

        pmodel.fdict = fdict
        pmodel.name = str(fdict['name'])
        pmodel.path = os.path.dirname(npzfile)
        pmodel.data = cpickle.loads(fdict['data'])
        pmodel_dump = cpickle.dumps(pmodel, protocol=4)
        pmodel.fdict['model_dump'] = pmodel_dump

        pmodel.debug = platform == "darwin" or platform == "win32"
        if pmodel.debug:
            print('[DSGE:]'.ljust(
                15, ' ') + ' Parallelization disabled under Windows and Mac due to a problem with pickling some of the symbolic elements. Sorry...')

        for ob in pmodel.fdict.keys():
            if str(pmodel.fdict[ob]) == 'None':
                pmodel.fdict[ob] = None

        if verbose:
            print('[DSGE:]'.ljust(15, ' ')+' Loading and parsing done in %ss.' %
                  np.round(time.time()-st, 5))

        return pmodel
Ejemplo n.º 43
0
 def __setstate__(self, state):
     state["input_spec"] = cp.loads(state["input_spec"])
     state["output_spec"] = cp.loads(state["output_spec"])
     state["inputs"] = make_klass(state["input_spec"])(**state["inputs"])
     self.__dict__.update(state)
    sp = ChemSentencePiece()
    sp.load(config_dic.get("sp_path"))

    # load train data
    print("=========== Load train data ===========")
    if os.path.exists(
            os.path.join(config_dic.get("cache_dir"),
                         config_dic.get('train_name') + ".word_documents")):
        print(
            f"Use Cache data. {os.path.join(config_dic.get('cache_dir'), config_dic.get('train_name') + '.word_documents')}"
        )
        with open(
                os.path.join(config_dic.get("cache_dir"),
                             config_dic.get('train_name') + ".word_documents"),
                "rb") as f:
            word_documents = cloudpickle.loads(f.read())
    else:
        with open(config_dic.get("lm_input_path"), "rt",
                  encoding="utf-8") as f:
            lines = f.read().split("\n")

        word_documents = []
        num_re = re.compile("\d+\.*\d*")
        for line in tqdm(lines):
            word_document = []
            if line:
                for word in tokenize(line):
                    if config_dic.get("number_normalize") and num_re.match(
                            word):
                        word = NUM
                    word_document.append(word)
Ejemplo n.º 45
0
def deserialize(bad: Dict, frames: List[bytes]) -> ConnectionClass:
    import cloudpickle
    info = cloudpickle.loads(frames[0])
    return ConnectionClass(info["host"], info["port"])
Ejemplo n.º 46
0
 def deserialize(self, serialized_obj: bytes) -> object:
     if not isinstance(serialized_obj, bytes):
         raise InvalidTypeForDeserializationException(
             'Please ensure the serialized object is of type bytes.')
     return cloudpickle.loads(serialized_obj)
Ejemplo n.º 47
0
 def __setstate__(self, var: Any) -> None:
     self.var = cloudpickle.loads(var)
Ejemplo n.º 48
0
 def __setstate__(self, state):
     function, atol, rtol, min_npoints, data = state
     function = cloudpickle.loads(function)
     self.__init__(function, atol, rtol, min_npoints)
     self._set_data(data)
Ejemplo n.º 49
0
    def main():
        # Parse the arguments.
        args = parse_arguments()

        args.model_folder_name = os.path.join(theme_name, 'chainer')

        base_epoch = complexity_degree[high_low]
        args.epoch = int(base_epoch * 60 / method_complexity[method_name])
        args.epoch = max(args.epoch, 5)

        #args.epoch = int(float(t_epochs.get()))
        args.out = parent_path / 'models' / theme_name / method_name / high_low
        args.method = method_name

        if t_model_path != "":
            args.source_transferlearning = Path(t_model_path.get())

        print(theme_name)

        if args.label:
            labels = args.label
            class_num = len(labels) if isinstance(labels, list) else 1
        else:
            raise ValueError('No target label was specified.')

        # Dataset preparation. Postprocessing is required for the regression task.
        def postprocess_label(label_list):
            return numpy.asarray(label_list, dtype=numpy.float32)

        # Apply a preprocessor to the dataset.
        print('Preprocessing dataset...')
        preprocessor = preprocess_method_dict[args.method]()
        smiles_col_name = t_smiles.get()

        parser = CSVFileParser(preprocessor,
                               postprocess_label=postprocess_label,
                               labels=labels,
                               smiles_col=smiles_col_name)

        args.datafile = t_csv_filepath.get()
        dataset = parser.parse(args.datafile)['dataset']

        # Scale the label values, if necessary.
        if args.scale == 'standardize':
            scaler = StandardScaler()
            scaler.fit(dataset.get_datasets()[-1])
        else:
            scaler = None

        # Split the dataset into training and validation.
        train_data_size = int(len(dataset) * args.train_data_ratio)
        trainset, testset = split_dataset_random(dataset, train_data_size,
                                                 args.seed)

        print((args.source_transferlearning / method_name / high_low /
               'regressor.pickle'))
        print((args.source_transferlearning / method_name / high_low /
               'regressor.pickle').exists())

        # Set up the predictor.

        if  Booleanvar_transfer_learning.get() == True  \
              and (args.source_transferlearning / method_name / high_low /'regressor.pickle').exists() == True:

            # refer https://github.com/pfnet-research/chainer-chemistry/issues/407
            with open(
                    args.source_transferlearning / method_name / high_low /
                    'regressor.pickle', 'rb') as f:
                regressor = cloudpickle.loads(f.read())
                pre_predictor = regressor.predictor
                predictor = GraphConvPredictor(pre_predictor.graph_conv,
                                               MLP(out_dim=1, hidden_dim=16))

        else:
            predictor = set_up_predictor(args.method,
                                         args.unit_num,
                                         args.conv_layers,
                                         class_num,
                                         label_scaler=scaler)

        # Set up the regressor.
        device = chainer.get_device(args.device)
        metrics_fun = {'mae': functions.mean_absolute_error, 'rmse': rmse}

        regressor = Regressor(predictor,
                              lossfun=functions.mean_squared_error,
                              metrics_fun=metrics_fun,
                              device=device)

        print('Training... : ', method_name)
        run_train(regressor,
                  trainset,
                  valid=None,
                  batch_size=args.batchsize,
                  epoch=args.epoch,
                  out=args.out,
                  extensions_list=None,
                  device=device,
                  converter=concat_mols,
                  resume_path=None)

        # Save the regressor's parameters.
        args.model_foldername = t_theme_name.get()

        model_path = os.path.join(args.out, args.model_foldername,
                                  args.model_filename)
        print('Saving the trained model to {}...'.format(model_path))

        # TODO(nakago): ChainerX array cannot be sent to numpy array when internal
        # state has gradients.
        if hasattr(regressor.predictor.graph_conv, 'reset_state'):
            regressor.predictor.graph_conv.reset_state()

        with open(
                parent_path / 'models' / theme_name / method_name / high_low /
            ('regressor.pickle'), 'wb') as f:
            cloudpickle.dump(regressor, f)

        #with open(parent_path / 'models' / theme_name / method_name / high_low /('predictor.pickle'),  'wb') as f:
        #    cloudpickle.dump(predictor, f)

        print('Evaluating... : ', method_name)
        test_iterator = SerialIterator(testset,
                                       16,
                                       repeat=False,
                                       shuffle=False)
        eval_result = Evaluator(test_iterator,
                                regressor,
                                converter=concat_mols,
                                device=device)()
        print('Evaluation result: : ', method_name)
        print(eval_result)

        @chainer.dataset.converter()
        def extract_inputs(batch, device=None):
            return concat_mols(batch, device=device)[:-1]

        pred_train = regressor.predict(trainset, converter=extract_inputs)
        pred_train = [i[0] for i in pred_train]
        pred_test = regressor.predict(testset, converter=extract_inputs)
        pred_test = [i[0] for i in pred_test]

        y_train = [i[2][0] for i in trainset]
        y_test = [i[2][0] for i in testset]
        title = args.label
        save_path = parent_path / 'results' / theme_name / method_name / high_low / 'scatter.png'
        save_scatter(y_train, pred_train, y_test, pred_test, title, save_path)

        global image_score
        image_score_open = Image.open(parent_path / 'results' / theme_name /
                                      method_name / high_low / 'scatter.png')
        image_score = ImageTk.PhotoImage(image_score_open, master=frame1)

        canvas.create_image(200, 200, image=image_score)

        from sklearn.metrics import mean_squared_error, mean_absolute_error
        from sklearn.metrics import r2_score

        train_mse = mean_squared_error(y_train, pred_train)
        test_mse = mean_squared_error(y_test, pred_test)

        train_rmse = np.sqrt(train_mse)
        test_rmse = np.sqrt(test_mse)

        train_mae = mean_absolute_error(y_train, pred_train)
        test_mae = mean_absolute_error(y_test, pred_test)

        train_r2score = r2_score(y_train, pred_train)
        test_r2score = r2_score(y_test, pred_test)

        print('train_mse : ', train_mse)
        print('test_mse : ', test_mse)
        print('train_rmse : ', train_rmse)
        print('test_rmse : ', test_rmse)
        print('train_mae : ', train_mae)
        print('test_mae : ', train_mae)
        print('train_r2score : ', train_r2score)
        print('test_r2score : ', test_r2score)
Ejemplo n.º 50
0
def _json_tricks_any_object_decode(obj: Dict[str, Any]) -> Any:
    if isinstance(obj, dict) and '__nni_obj__' in obj:
        obj = obj['__nni_obj__']
        b = base64.b64decode(obj)
        return cloudpickle.loads(b)
    return obj
Ejemplo n.º 51
0
    def slave_run(self):
        """
        This method is the infinite loop a slave enters directly from init.
        It makes the slave wait for a command to perform from the master and
        then calls the appropriate function.

        This method also takes care of the synchronization of data between the
        master and the slaves by matching PDSs based on the pds_ids sent by the master
        with the command.

        Commands received from the master are of the form of a tuple.
        The first component of the tuple is always the operation to be performed
        and the rest are conditional on the operation.

        (op,pds_id) where op == OP_PARALLELIZE for parallelize
        (op,pds_id, pds_id_result,func) where op == OP_MAP for map.
        (op,pds_id) where op == OP_COLLECT for a collect operation
        (op,pds_id) where op == OP_DELETEPDS for a delete of the remote PDS on slaves
        (op,) where op==OP_FINISH for the slave to break out of the loop and terminate
        """

        # Initialize PDS data store here because only slaves need to do it.
        self.pds_store = {}

        while True:
            data = self.comm.bcast(None, root=0)

            op = data[0]
            if op == self.OP_PARALLELIZE:
                pds_id = data[1]
                self.__rec_pds_id = pds_id
                pds_id, pds_id_new = self.__get_received_pds_id()
                self.pds_store[pds_id] = None

            elif op == self.OP_MAP:
                pds_id, pds_id_result, function_packed = data[1:]
                self.__rec_pds_id, self.__rec_pds_id_result = pds_id, pds_id_result

                #Use cloudpickle to convert back function string to a function
                func = cloudpickle.loads(function_packed)

                #Enter the map so we can grab data and perform the func.
                #Func sent before and not during for performance reasons
                pds_res = self.map(func)

                # Store the result in a newly gnerated PDS pds_id
                self.pds_store[pds_res.pds_id] = pds_res

            elif op == self.OP_BROADCAST:
                self.__bds_id = data[1]
                self.broadcast(None)

            elif op == self.OP_COLLECT:
                pds_id = data[1]

                # Access an existing PDS from data store
                pds = self.pds_store[pds_id]

                self.collect(pds)

            elif op == self.OP_DELETEPDS:
                pds_id = data[1]
                del self.pds_store[pds_id]

            elif op == self.OP_DELETEBDS:
                bds_id = data[1]
                del self.bds_store[bds_id]

            elif op == self.OP_FINISH:
                quit()
            else:
                raise Exception("Slave recieved unknown command code")
Ejemplo n.º 52
0
def run(remote_dir, distribution_strategy_text):
  """deserializes Model and Dataset and runs them.

  Args:
    remote_dir: Temporary cloud storage folder that contains model and Dataset
      graph. This folder is also used for job output.
    distribution_strategy_text: Specifies the distribution strategy for remote
      execution when a jobspec is provided. Accepted values are strategy names
      as specified by 'tf.distribute.<strategy>.__name__'.
  """
  logging.info('Setting distribution strategy to %s',
               distribution_strategy_text)

  is_mwms = distribution_strategy_text == MULTI_WORKER_MIRRORED_STRATEGY_NAME

  distribution_strategy = SUPPORTED_DISTRIBUTION_STRATEGIES[
      distribution_strategy_text]()

  with distribution_strategy.scope():
    if utils.is_tf_v1():
      training_assets_graph = tf.compat.v2.saved_model.load(
          export_dir=os.path.join(remote_dir, 'training_assets'), tags=None)
    else:
      training_assets_graph = tf.saved_model.load(
          os.path.join(remote_dir, 'training_assets'))

    fit_kwargs = {}
    if hasattr(training_assets_graph, 'fit_kwargs_fn'):
      fit_kwargs = tfds.as_numpy(training_assets_graph.fit_kwargs_fn())
      logging.info('fit_kwargs were loaded successfully.')

    if hasattr(training_assets_graph, 'x_fn'):
      fit_kwargs['x'] = training_assets_graph.x_fn()
      logging.info('x was loaded successfully.')

    if hasattr(training_assets_graph, 'y_fn'):
      fit_kwargs['y'] = training_assets_graph.y_fn()
      logging.info('y was loaded successfully.')

    if hasattr(training_assets_graph, 'validation_data_fn'):
      fit_kwargs['validation_data'] = training_assets_graph.validation_data_fn()

    if hasattr(training_assets_graph, 'callbacks_fn'):
      pickled_callbacks = tfds.as_numpy(training_assets_graph.callbacks_fn())
      fit_kwargs['callbacks'] = cloudpickle.loads(pickled_callbacks)
      logging.info('callbacks were loaded successfully.')

    model = tf.keras.models.load_model(os.path.join(remote_dir, 'model'))
    logging.info('Model was loaded from %s successfully.',
                 os.path.join(remote_dir, 'model'))
    model.fit(**fit_kwargs)

  # We need to set a different directory on workers when using MWMS since we
  # will run into errors due to concurrent writes to the same directory.
  # This is a workaround for the issue described in b/148619319.
  if not _is_current_worker_chief() and is_mwms:
    tmp_worker_dir = os.path.join(remote_dir,
                                  'output/tmp/workers_' + str(uuid.uuid4()))
    logging.info('Saving model from worker in temporary folder %s.',
                 tmp_worker_dir)
    model.save(tmp_worker_dir)

    logging.info('Removing temporary folder %s.', tmp_worker_dir)
    _delete_dir(tmp_worker_dir)

  else:
    model.save(os.path.join(remote_dir, 'output'))
Ejemplo n.º 53
0
def _subproc_wrapper(fn, queue, *args, **kwargs):
    fn = cloudpickle.loads(fn)
    results = fn(*args, **kwargs)
    queue.put(results)
Ejemplo n.º 54
0
def worker(
    parent_conn: Connection,
    step_queue: Queue,
    pickled_env_factory: str,
    worker_id: int,
    engine_configuration: EngineConfig,
) -> None:
    env_factory: Callable[[int, List[SideChannel]],
                          UnityEnvironment] = cloudpickle.loads(
                              pickled_env_factory)
    shared_float_properties = FloatPropertiesChannel()
    engine_configuration_channel = EngineConfigurationChannel()
    engine_configuration_channel.set_configuration(engine_configuration)
    env: BaseUnityEnvironment = env_factory(
        worker_id, [shared_float_properties, engine_configuration_channel])

    def _send_response(cmd_name, payload):
        parent_conn.send(EnvironmentResponse(cmd_name, worker_id, payload))

    try:
        while True:
            cmd: EnvironmentCommand = parent_conn.recv()
            if cmd.name == "step":
                all_action_info = cmd.payload
                actions = {}
                values = {}
                for brain_name, action_info in all_action_info.items():
                    actions[brain_name] = action_info.action
                    values[brain_name] = action_info.value
                all_brain_info = env.step(vector_action=actions, value=values)
                # The timers in this process are independent from all the processes and the "main" process
                # So after we send back the root timer, we can safely clear them.
                # Note that we could randomly return timers a fraction of the time if we wanted to reduce
                # the data transferred.
                # TODO get gauges from the workers and merge them in the main process too.
                step_response = StepResponse(all_brain_info, get_timer_root())
                step_queue.put(
                    EnvironmentResponse("step", worker_id, step_response))
                reset_timers()
            elif cmd.name == "external_brains":
                _send_response("external_brains", env.external_brains)
            elif cmd.name == "get_properties":
                reset_params = {}
                for k in shared_float_properties.list_properties():
                    reset_params[k] = shared_float_properties.get_property(k)

                _send_response("get_properties", reset_params)
            elif cmd.name == "reset":
                for k, v in cmd.payload.items():
                    shared_float_properties.set_property(k, v)
                all_brain_info = env.reset()
                _send_response("reset", all_brain_info)
            elif cmd.name == "close":
                break
    except (KeyboardInterrupt, UnityCommunicationException,
            UnityTimeOutException):
        logger.info(
            f"UnityEnvironment worker {worker_id}: environment stopping.")
        step_queue.put(EnvironmentResponse("env_close", worker_id, None))
    finally:
        # If this worker has put an item in the step queue that hasn't been processed by the EnvManager, the process
        # will hang until the item is processed. We avoid this behavior by using Queue.cancel_join_thread()
        # See https://docs.python.org/3/library/multiprocessing.html#multiprocessing.Queue.cancel_join_thread for
        # more info.
        logger.debug(f"UnityEnvironment worker {worker_id} closing.")
        step_queue.cancel_join_thread()
        step_queue.close()
        env.close()
        logger.debug(f"UnityEnvironment worker {worker_id} done.")
Ejemplo n.º 55
0
 def data(self):
     return cloudpickle.loads(self['data'])
Ejemplo n.º 56
0
 def loads(self, obj):
     return cloudpickle.loads(obj)
Ejemplo n.º 57
0
def roundtrip(ob):
    return cloudpickle.loads(cloudpickle.dumps(ob))
Ejemplo n.º 58
0
def get_active_set(redis: StrictRedis, ana_id: str, t: int) -> set:
    """Read active set from redis."""
    active_set = redis.get(idfy(ACTIVE_SET, ana_id, t))
    if active_set is not None:
        active_set = pickle.loads(active_set)
    return active_set
Ejemplo n.º 59
0
def _worker_populate_task(G, env, policy, scope=None):
    G = _get_scoped_G(G, scope)
    G.env = pickle.loads(env)
    G.policy = pickle.loads(policy)
Ejemplo n.º 60
0
def run_experiment(argv):
    """Run experiment.

    Args:
        argv (list[str]): Command line arguments.

    Raises:
        BaseException: Propagate any exception in the experiment.

    """
    now = datetime.datetime.now(dateutil.tz.tzlocal())

    # avoid name clashes when running distributed jobs
    rand_id = str(uuid.uuid4())[:5]
    timestamp = now.strftime('%Y_%m_%d_%H_%M_%S_%f_%Z')

    default_exp_name = 'experiment_%s_%s' % (timestamp, rand_id)
    parser = argparse.ArgumentParser()
    parser.add_argument('--exp_name',
                        type=str,
                        default=default_exp_name,
                        help='Name of the experiment.')
    parser.add_argument('--log_dir',
                        type=str,
                        default=None,
                        help='Path to save the log and iteration snapshot.')
    parser.add_argument('--snapshot_mode',
                        type=str,
                        default='last',
                        help='Mode to save the snapshot. Can be either "all" '
                        '(all iterations will be saved), "last" (only '
                        'the last iteration will be saved), "gap" (every'
                        '`snapshot_gap` iterations are saved), or "none" '
                        '(do not save snapshots)')
    parser.add_argument('--snapshot_gap',
                        type=int,
                        default=1,
                        help='Gap between snapshot iterations.')
    parser.add_argument(
        '--resume_from_dir',
        type=str,
        default=None,
        help='Directory of the pickle file to resume experiment from.')
    parser.add_argument('--resume_from_epoch',
                        type=str,
                        default=None,
                        help='Index of iteration to restore from. '
                        'Can be "first", "last" or a number. '
                        'Not applicable when snapshot_mode="last"')
    parser.add_argument('--tabular_log_file',
                        type=str,
                        default='progress.csv',
                        help='Name of the tabular log file (in csv).')
    parser.add_argument('--text_log_file',
                        type=str,
                        default='debug.log',
                        help='Name of the text log file (in pure text).')
    parser.add_argument('--tensorboard_step_key',
                        type=str,
                        default=None,
                        help='Name of the step key in tensorboard_summary.')
    parser.add_argument('--params_log_file',
                        type=str,
                        default='params.json',
                        help='Name of the parameter log file (in json).')
    parser.add_argument('--variant_log_file',
                        type=str,
                        default='variant.json',
                        help='Name of the variant log file (in json).')
    parser.add_argument('--plot',
                        type=ast.literal_eval,
                        default=False,
                        help='Whether to plot the iteration results')
    parser.add_argument(
        '--log_tabular_only',
        type=ast.literal_eval,
        default=False,
        help='Print only the tabular log information (in a horizontal format)')
    parser.add_argument('--seed',
                        type=int,
                        default=None,
                        help='Random seed for numpy')
    parser.add_argument('--args_data',
                        type=str,
                        help='Pickled data for objects')
    parser.add_argument('--variant_data',
                        type=str,
                        help='Pickled data for variant configuration')

    args = parser.parse_args(argv[1:])

    if args.seed is not None:
        deterministic.set_seed(args.seed)

    if not args.plot:
        garage.plotter.Plotter.disable()
        garage.tf.plotter.Plotter.disable()

    if args.log_dir is None:
        log_dir = os.path.join(os.path.join(os.getcwd(), 'data'),
                               args.exp_name)
    else:
        log_dir = args.log_dir

    tabular_log_file = os.path.join(log_dir, args.tabular_log_file)
    text_log_file = os.path.join(log_dir, args.text_log_file)
    params_log_file = os.path.join(log_dir, args.params_log_file)

    if args.variant_data is not None:
        variant_data = pickle.loads(base64.b64decode(args.variant_data))
        variant_log_file = os.path.join(log_dir, args.variant_log_file)
        dump_variant(variant_log_file, variant_data)
    else:
        variant_data = None

    log_parameters(params_log_file, args)

    logger.add_output(dowel.TextOutput(text_log_file))
    logger.add_output(dowel.CsvOutput(tabular_log_file))
    logger.add_output(dowel.TensorBoardOutput(log_dir, x_axis='TotalEnvSteps'))
    logger.add_output(dowel.StdOutput())

    logger.push_prefix('[%s] ' % args.exp_name)

    snapshot_config = SnapshotConfig(snapshot_dir=log_dir,
                                     snapshot_mode=args.snapshot_mode,
                                     snapshot_gap=args.snapshot_gap)

    method_call = cloudpickle.loads(base64.b64decode(args.args_data))
    try:
        method_call(snapshot_config, variant_data, args.resume_from_dir,
                    args.resume_from_epoch)
    except BaseException:
        children = garage.plotter.Plotter.get_plotters()
        children += garage.tf.plotter.Plotter.get_plotters()
        child_proc_shutdown(children)
        raise

    logger.remove_all()
    logger.pop_prefix()