Python assocの例、toolz.assoc Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_images2tfrecords.py プロジェクト: leighlin0511/ll_utils

def fake_metadata(num_exps,
                  num_plates,
                  num_wells,
                  num_sites,
                  datasets=('train', 'test')):
    rows = []
    for dataset in datasets:
        for _ in range(num_exps):
            cell_type = np.random.choice(CELL_TYPES)
            experiment_number = np.random.choice([1, 2, 3, 4, 5, 6, 7, 8, 9])
            experiment = '{}-{:02d}'.format(cell_type, experiment_number)
            base_row = {
                'experiment': experiment,
                'cell_type': cell_type,
                'dataset': dataset
            }
            for p in range(1, num_plates + 1):
                base_row = t.assoc(base_row, 'plate', p)
                rand_wells = np.random.choice(
                    ALL_WELLS, num_wells, replace=False)
                for well in rand_wells:
                    well_row = t.merge(base_row, {
                        'well': well,
                        'well_type': 'treatment',
                        'sirna': 1.0
                    })
                    for site in range(1, num_sites + 1):
                        rows.append(t.assoc(well_row, 'site', site))

    df = pd.DataFrame(rows)
    return df

コード例 #2

0

ファイルを表示

ファイル: applications.py プロジェクト: sgizm/experiment-server

    def data_for_app_GET(self):
        """ List all configurationkeys and rangeconstraints of specific application.
            Returns application with configurationkeys, rangeconstraints and exclusionconstraints
        """
        app_id = self.request.swagger_data['id']
        app = Application.get(app_id)
        if app is None:
            print_log(datetime.datetime.now(), 'GET',
                      '/applications/' + str(id) + '/rangeconstraints',
                      'Get all things of one application', None)
            return self.createResponse(None, 400)
        if app.apikey is None:
            app = self.set_app_apikey(app, app_id)
        configurationkeys = app.configurationkeys
        ranges = list(
            concat(list(map(lambda _: _.rangeconstraints, configurationkeys))))
        exclusions = self.get_app_exclusionconstraints(app_id)

        app_data = app.as_dict()
        app_data = assoc(app_data, 'configurationkeys',
                         list(map(lambda _: _.as_dict(), configurationkeys)))
        app_data = assoc(app_data, 'rangeconstraints',
                         list(map(lambda _: _.as_dict(), ranges)))
        app_data = assoc(app_data, 'exclusionconstraints',
                         list(map(lambda _: _.as_dict(), exclusions)))

        return app_data

コード例 #3

0

ファイルを表示

 def __init__(self, center, workers):
     self.center = assoc(start_center(center), 'address', center)
     self.workers = [
         assoc(start_worker(center, worker), 'address', worker)
         for worker in workers
     ]
     sleep(1)
     self.report()

コード例 #4

0

ファイルを表示

ファイル: todo.py プロジェクト: ariddell/aioredux

def todo_app(state, action):
    if action['type'] == ActionTypes.ADD_TODO:
        todos = state['todos'] + (action['text'],)
        return toolz.assoc(state, 'todos', todos)
    elif action['type'] == ActionTypes.COMPLETE_TODO:
        todos = state['todos'][:action['index']] + state['todos'][action['index'] + 1:]
        return toolz.assoc(state, 'todos', todos)
    else:
        return state

コード例 #5

0

ファイルを表示

ファイル: todo_asyncio.py プロジェクト: thomasantony/aioredux

def todo_app(state, action):
    if action['type'] == ActionTypes.ADD_TODO:
        todos = state['todos'] + (action['text'],)
        return toolz.assoc(state, 'todos', todos)
    elif action['type'] == ActionTypes.COMPLETE_TODO:
        todos = state['todos'][:action['index']] + state['todos'][action['index'] + 1:]
        return toolz.assoc(state, 'todos', todos)
    else:
        return state

コード例 #6

0

ファイルを表示

def set_transaction_type_if_needed(transaction_dict: Dict[str, Any]) -> Dict[str, Any]:
    if 'type' not in transaction_dict:
        if 'gasPrice' in transaction_dict and 'accessList' in transaction_dict:
            # access list txn - type 1
            transaction_dict = assoc(transaction_dict, 'type', '0x1')
        elif 'maxFeePerGas' in transaction_dict and 'maxPriorityFeePerGas' in transaction_dict:
            # dynamic fee txn - type 2
            transaction_dict = assoc(transaction_dict, 'type', '0x2')
    return transaction_dict

コード例 #7

0

ファイルを表示

ファイル: tcga_to_bcbio.py プロジェクト: samesun/bcbio-nextgen

def rebatch_metadata_by_experiment(metadata):
    normal, normal_rest = prioritize_normals(metadata)
    batch = metadata[0]["participant"]
    tumor_batch = [tz.assoc(x, "batch", batch) for x in metadata if x["sample_type"] in PRIORITIZED_TUMOR_CODES.keys()]
    normal = [tz.assoc(normal, "batch", batch)] if normal else []
    # run each non priority normal as its own tumor sample with no control
    normal_rest = [tz.assoc(x, "batch", batch + "-" + x["sample_type"]) for x in normal_rest]
    normal_rest = [tz.assoc(x, "phenotype", "tumor") for x in normal_rest]
    all_batches = normal + normal_rest + tumor_batch
    return all_batches

コード例 #8

0

ファイルを表示

ファイル: unify.py プロジェクト: gintian/MonPLP

def unify(u, v, s):
    u = transitive_get(u, s)
    v = transitive_get(v, s)
    if u == v:
        return s
    if isvar(u):
        return assoc(s, u, v)  #返回新的匹配组
    if isvar(v):
        return assoc(s, v, u)
    return _unify(u, v, s)

コード例 #9

0

ファイルを表示

ファイル: utils_test.py プロジェクト: tomMoral/distributed

def tls_cluster_context(worker_kwargs=None, scheduler_kwargs=None,
                        security=None, **kwargs):
    security = security or tls_only_security()
    worker_kwargs = assoc(worker_kwargs or {}, 'security', security)
    scheduler_kwargs = assoc(scheduler_kwargs or {}, 'security', security)

    with cluster(worker_kwargs=worker_kwargs,
                 scheduler_kwargs=scheduler_kwargs,
                 **kwargs) as (s, workers):
        yield s, workers

コード例 #10

0

ファイルを表示

ファイル: utils_test.py プロジェクト: shenlab-ucsf/SMART-Q

def tls_cluster_context(worker_kwargs=None, scheduler_kwargs=None,
                        security=None, **kwargs):
    security = security or tls_only_security()
    worker_kwargs = assoc(worker_kwargs or {}, 'security', security)
    scheduler_kwargs = assoc(scheduler_kwargs or {}, 'security', security)

    with cluster(worker_kwargs=worker_kwargs,
                 scheduler_kwargs=scheduler_kwargs,
                 **kwargs) as (s, workers):
        yield s, workers

コード例 #11

0

ファイルを表示

ファイル: core.py プロジェクト: broxtronix/distributed

 def __init__(self, handlers, max_buffer_size=MAX_BUFFER_SIZE,
         connection_limit=512, **kwargs):
     self.handlers = assoc(handlers, 'identity', self.identity)
     self.id = str(uuid.uuid1())
     self._port = None
     self.rpc = ConnectionPool(limit=connection_limit)
     super(Server, self).__init__(max_buffer_size=max_buffer_size, **kwargs)

コード例 #12

0

ファイルを表示

ファイル: core.py プロジェクト: dask/distributed

    def __init__(self, handlers, max_buffer_size=MAX_BUFFER_SIZE,
            connection_limit=512, deserialize=True, **kwargs):
        self.handlers = assoc(handlers, 'identity', self.identity)
        self.id = str(uuid.uuid1())
        self._port = None
        self._listen_streams = dict()
        self.rpc = ConnectionPool(limit=connection_limit,
                                  deserialize=deserialize)
        self.deserialize = deserialize
        self.monitor = SystemMonitor()
        self.counters = None
        self.digests = None
        if hasattr(self, 'loop'):
            with ignoring(ImportError):
                from .counter import Digest
                self.digests = defaultdict(partial(Digest, loop=self.loop))

            from .counter import Counter
            self.counters = defaultdict(partial(Counter, loop=self.loop))

            pc = PeriodicCallback(self.monitor.update, 500, io_loop=self.loop)
            self.loop.add_callback(pc.start)
            if self.digests is not None:
                self._last_tick = time()
                self._tick_pc = PeriodicCallback(self._measure_tick, 20, io_loop=self.loop)
                self.loop.add_callback(self._tick_pc.start)


        self.__stopped = False

        super(Server, self).__init__(max_buffer_size=max_buffer_size, **kwargs)

コード例 #13

0

ファイルを表示

ファイル: threat_service_test.py プロジェクト: GArmane/iheroes

    async def test_unmonitored_threat(
        self,
        threat_monitor,
        threat_repo,
        occurrence_factory,
        threat_factory,
        report_threat_dto_factory,
    ):
        dto = report_threat_dto_factory()
        threat = threat_factory(
            name=dto.name,
            danger_level=dto.danger_level,
            location=dto.location,
            occurrences=[occurrence_factory(state="resolved").dict()],
        )
        new_threat = threat_factory(
            **assoc(
                threat.dict(),
                "occurrences",
                [occurrence_factory(state="pending").dict()],
            ), )

        threat_repo.upsert.return_value = threat
        threat_repo.create_pending_occurrence.return_value = new_threat
        threat_monitor.start_monitoring.return_value = new_threat

        result = await threat_service.report_threat(threat_monitor,
                                                    threat_repo, dto)

        threat_repo.upsert.assert_called_once_with(dto)
        threat_repo.create_pending_occurrence.assert_called_once_with(threat)
        threat_monitor.start_monitoring.assert_called_once_with(new_threat)

        assert result == new_threat
        assert result.is_being_monitored() is True

コード例 #14

0

ファイルを表示

ファイル: wtg.py プロジェクト: sarenehan/uwnet

def step_with_model(rhs, state, dt=.125, n=100):
    """Perform a number of time steps with a model"""
    for t in range(n):
        qt_dot = rhs(state)
        new_qt = state['QT'] + dt * qt_dot['QT']
        state = assoc(state, 'QT', new_qt)
        yield state

コード例 #15

0

ファイルを表示

    def __init__(self, handlers, connection_limit=512, deserialize=True,
                 io_loop=None):
        self.handlers = assoc(handlers, 'identity', self.identity)
        self.id = str(uuid.uuid1())
        self._address = None
        self._listen_address = None
        self._port = None
        self._comms = {}
        self.rpc = ConnectionPool(limit=connection_limit,
                                  deserialize=deserialize)
        self.deserialize = deserialize
        self.monitor = SystemMonitor()
        self.counters = None
        self.digests = None

        self.listener = None
        self.io_loop = io_loop or IOLoop.current()

        if hasattr(self, 'loop'):
            # XXX?
            with ignoring(ImportError):
                from .counter import Digest
                self.digests = defaultdict(partial(Digest, loop=self.loop))

            from .counter import Counter
            self.counters = defaultdict(partial(Counter, loop=self.loop))

            pc = PeriodicCallback(self.monitor.update, 500, io_loop=self.loop)
            self.loop.add_callback(pc.start)
            if self.digests is not None:
                self._last_tick = time()
                self._tick_pc = PeriodicCallback(self._measure_tick, 20, io_loop=self.loop)
                self.loop.add_callback(self._tick_pc.start)

        self.__stopped = False

コード例 #16

0

ファイルを表示

ファイル: __init__.py プロジェクト: llllllllll/fz

def _normalize_arg(other, constants):
    """Get the name to use to build the string and turn the value into
    something that can go into the ast.

    If needed this will functionally update the constants.

    Parameters
    ----------
    other : any
        The object to normalize.
    constants : dict[str -> any]
        The constant namespace.

    Returns
    -------
    othername : str
        The name to use in the ``_name`` of the lambda.
    other : any
        The normalized value.
    constants : dict[str -> any]
        The potentially updated constants.
    """
    if not isinstance(other, placeholder):
        othername = repr(other)
        name = '_' + uuid4().hex
        constants = assoc(constants, name, other)
        other = ast.Name(id=name, ctx=ast.Load())
    elif other._tree is not other:
        othername = '(%s)' % other._name
    else:
        othername = other._name

    return othername, other, constants

コード例 #17

0

ファイルを表示

def groupby_many(keys: Callable[[Any], Iterable], reducer: Reducer, initial):
    """Given a `keys` function, that maps an element into multiple keys, transduces the collection into a dictionary of key to group of matching elements.

    >>> transducer.transduce(
        transducer.groupby_many(
            lambda x: ("even",) if x % 2 == 0 else ("odd",),
            lambda s, x: (*s, x),
            (),
        ),
        lambda s, _: s,
        {},
        [1, 2, 3, 4, 5],
    )
    {"even": (2, 4), "odd": (1, 3, 5)}
    """
    return functional_generic.compose(
        mapcat(
            functional_generic.compose_left(
                functional_generic.juxt(keys, functional.wrap_tuple),
                sync.star(itertools.product),
            ), ),
        lambda step: lambda s, x: step(
            toolz.assoc(s, x[0], reducer(s.get(x[0], initial), x[1])),
            x,
        ),
    )

コード例 #18

0

ファイルを表示

ファイル: test_batched.py プロジェクト: cnanakos/distributed

async def run_traffic_jam(nsends, nbytes):
    # This test eats `nsends * nbytes` bytes in RAM
    np = pytest.importorskip("numpy")
    from distributed.protocol import to_serialize

    data = bytes(np.random.randint(0, 255, size=(nbytes, )).astype("u1").data)
    async with EchoServer() as e:
        comm = await connect(e.address)

        b = BatchedSend(interval=0.01)
        b.start(comm)

        msg = {"x": to_serialize(data)}
        for i in range(nsends):
            b.send(assoc(msg, "i", i))
            if np.random.random() > 0.5:
                await asyncio.sleep(0.001)

        results = []
        count = 0
        while len(results) < nsends:
            # If this times out then I think it's a backpressure issue
            # Somehow we're able to flood the socket so that the receiving end
            # loses some of our messages
            L = await asyncio.wait_for(comm.read(), 5)
            count += 1
            results.extend(r["i"] for r in L)

        assert count == b.batch_count == e.count
        assert b.message_count == nsends

        assert results == list(range(nsends))

        await comm.close()  # external closing
        await b.close()

コード例 #19

0

ファイルを表示

ファイル: core.py プロジェクト: indera/distributed

 def __init__(self, handlers, max_buffer_size=MAX_BUFFER_SIZE,
         connection_limit=512, **kwargs):
     self.handlers = assoc(handlers, 'identity', self.identity)
     self.id = str(uuid.uuid1())
     self._port = None
     self.rpc = ConnectionPool(limit=connection_limit)
     super(Server, self).__init__(max_buffer_size=max_buffer_size, **kwargs)

コード例 #20

0

ファイルを表示

ファイル: test_batched.py プロジェクト: tomMoral/distributed

def run_traffic_jam(nsends, nbytes):
    # This test eats `nsends * nbytes` bytes in RAM
    np = pytest.importorskip('numpy')
    from distributed.protocol import to_serialize
    data = bytes(np.random.randint(0, 255, size=(nbytes,)).astype('u1').data)
    with echo_server() as e:
        comm = yield connect(e.address)

        b = BatchedSend(interval=0.01)
        b.start(comm)

        msg = {'x': to_serialize(data)}
        for i in range(nsends):
            b.send(assoc(msg, 'i', i))
            if np.random.random() > 0.5:
                yield gen.sleep(0.001)

        results = []
        count = 0
        while len(results) < nsends:
            # If this times out then I think it's a backpressure issue
            # Somehow we're able to flood the socket so that the receiving end
            # loses some of our messages
            L = yield gen.with_timeout(timedelta(seconds=5), comm.read())
            count += 1
            results.extend(r['i'] for r in L)

        assert count == b.batch_count == e.count
        assert b.message_count == nsends

        assert results == list(range(nsends))

        comm.close()  # external closing
        yield b.close()

コード例 #21

0

ファイルを表示

ファイル: core.py プロジェクト: thachrocky12345/practicAI

def unify(u, v, s):  # no check at the moment
    """ Find substitution so that u == v while satisfying s

    >>> x = var('x')
    >>> unify((1, x), (1, 2), {})
    {~x: 2}
    """
    u = walk(u, s)
    v = walk(v, s)
    if u == v:
        return s
    if isvar(u):
        return assoc(s, u, v)
    if isvar(v):
        return assoc(s, v, u)
    return _unify(u, v, s)

コード例 #22

0

ファイルを表示

ファイル: io.py プロジェクト: jayhetee/dask

def fill_kwargs(fn, args, kwargs):
    """ Read a csv file and fill up kwargs

    This normalizes kwargs against a sample file.  It does the following:

    1.  If given a globstring, just use one file
    2.  Get names from csv file if not given
    3.  Identify the presence of a header
    4.  Identify dtypes
    5.  Establish column names
    6.  Switch around dtypes and column names if parse_dates is active

    Normally ``pd.read_csv`` does this for us.  However for ``dd.read_csv`` we
    need to be consistent across multiple files and don't want to do these
    heuristics each time so we use the pandas solution once, record the
    results, and then send back a fully explicit kwargs dict to send to future
    calls to ``pd.read_csv``.

    Returns
    -------

    kwargs: dict
        keyword arguments to give to pd.read_csv
    """
    kwargs = merge(csv_defaults, kwargs)
    sample_nrows = kwargs.pop('sample_nrows', 1000)
    essentials = ['columns', 'names', 'header', 'parse_dates', 'dtype']
    if set(essentials).issubset(kwargs):
        return kwargs

    # Let pandas infer on the first 100 rows
    if '*' in fn:
        filenames = sorted(glob(fn))
        if not filenames:
            raise ValueError("No files found matching name %s" % fn)
        fn = filenames[0]

    if 'names' not in kwargs:
        kwargs['names'] = csv_names(fn, **kwargs)
    if 'header' not in kwargs:
        kwargs['header'] = infer_header(fn, **kwargs)
        if kwargs['header'] is True:
            kwargs['header'] = 0

    try:
        head = pd.read_csv(fn, *args, **assoc(kwargs, 'nrows', sample_nrows))
    except StopIteration:
        head = pd.read_csv(fn, *args, **kwargs)

    if 'parse_dates' not in kwargs:
        kwargs['parse_dates'] = [col for col in head.dtypes.index
                           if np.issubdtype(head.dtypes[col], np.datetime64)]
    if 'dtype' not in kwargs:
        kwargs['dtype'] = dict(head.dtypes)
        for col in kwargs['parse_dates']:
            del kwargs['dtype'][col]

    kwargs['columns'] = list(head.columns)

    return kwargs

コード例 #23

0

ファイルを表示

def fill_kwargs(fn, args, kwargs):
    """ Read a csv file and fill up kwargs

    This normalizes kwargs against a sample file.  It does the following:

    1.  If given a globstring, just use one file
    2.  Get names from csv file if not given
    3.  Identify the presence of a header
    4.  Identify dtypes
    5.  Establish column names
    6.  Switch around dtypes and column names if parse_dates is active

    Normally ``pd.read_csv`` does this for us.  However for ``dd.read_csv`` we
    need to be consistent across multiple files and don't want to do these
    heuristics each time so we use the pandas solution once, record the
    results, and then send back a fully explicit kwargs dict to send to future
    calls to ``pd.read_csv``.

    Returns
    -------

    kwargs: dict
        keyword arguments to give to pd.read_csv
    """
    kwargs = merge(csv_defaults, kwargs)
    sample_nrows = kwargs.pop('sample_nrows', 1000)
    essentials = ['columns', 'names', 'header', 'parse_dates', 'dtype']
    if set(essentials).issubset(kwargs):
        return kwargs

    # Let pandas infer on the first 100 rows
    if '*' in fn:
        filenames = sorted(glob(fn))
        if not filenames:
            raise ValueError("No files found matching name %s" % fn)
        fn = filenames[0]

    if 'names' not in kwargs:
        kwargs['names'] = csv_names(fn, **kwargs)
    if 'header' not in kwargs:
        kwargs['header'] = infer_header(fn, **kwargs)
        if kwargs['header'] is True:
            kwargs['header'] = 0

    try:
        head = pd.read_csv(fn, *args, **assoc(kwargs, 'nrows', sample_nrows))
    except StopIteration:
        head = pd.read_csv(fn, *args, **kwargs)

    if 'parse_dates' not in kwargs:
        kwargs['parse_dates'] = [col for col in head.dtypes.index
                           if np.issubdtype(head.dtypes[col], np.datetime64)]
    if 'dtype' not in kwargs:
        kwargs['dtype'] = dict(head.dtypes)
        for col in kwargs['parse_dates']:
            del kwargs['dtype'][col]

    kwargs['columns'] = list(head.columns)

    return kwargs

コード例 #24

0

ファイルを表示

ファイル: tcga_to_bcbio.py プロジェクト: vhuarui/bcbio-nextgen

def rebatch_metadata_by_experiment(metadata):
    normal, normal_rest = prioritize_normals(metadata)
    batch = metadata[0]["participant"]
    tumor_batch = [
        tz.assoc(x, "batch", batch) for x in metadata
        if x["sample_type"] in PRIORITIZED_TUMOR_CODES.keys()
    ]
    normal = [tz.assoc(normal, "batch", batch)] if normal else []
    # run each non priority normal as its own tumor sample with no control
    normal_rest = [
        tz.assoc(x, "batch", batch + "-" + x["sample_type"])
        for x in normal_rest
    ]
    normal_rest = [tz.assoc(x, "phenotype", "tumor") for x in normal_rest]
    all_batches = normal + normal_rest + tumor_batch
    return all_batches

コード例 #25

0

ファイルを表示

ファイル: worker.py プロジェクト: coobas/distributed

 def process(msg):
     try:
         result = yield self.compute(report=False, **msg)
         bstream.send(result)
     except Exception as e:
         logger.exception(e)
         bstream.send(assoc(error_message(e), 'key', msg.get('key')))

コード例 #26

0

ファイルを表示

ファイル: test_applications.py プロジェクト: sgizm/experiment-server

    def test_data_for_app_GET(self):
        from toolz import assoc, concat
        self.req.swagger_data = {'id': 1}
        httpApps = Applications(self.req)
        response = httpApps.data_for_app_GET()

        app = Application.get(1)
        configurationkeys = app.configurationkeys
        ranges = list(concat(list(map(lambda _: _.rangeconstraints, configurationkeys))))

        app_data = app.as_dict()
        app_data = assoc(app_data, 'configurationkeys', list(map(lambda _: _.as_dict(), configurationkeys)))
        app_data = assoc(app_data, 'rangeconstraints', list(map(lambda _: _.as_dict(), ranges)))
        app_data = assoc(app_data, 'exclusionconstraints', list(map(lambda _: _.as_dict(), httpApps.get_app_exclusionconstraints(1))))

        assert response == app_data

コード例 #27

0

ファイルを表示

ファイル: base_network.py プロジェクト: stocktwits/cloudformation-environmentbase

    def _get_subnet_config_w_cidr(self, network_config):
        network_cidr_base = str(network_config.get('network_cidr_base', '172.16.0.0'))
        network_cidr_size = str(network_config.get('network_cidr_size', '20'))
        first_network_address_block = str(network_config.get('first_network_address_block', network_cidr_base))

        ret_val = {}
        base_cidr = network_cidr_base + '/' + network_cidr_size
        net = netaddr.IPNetwork(base_cidr)

        grouped_subnet = groupby('size', self._get_subnet_config_w_az(network_config))
        subnet_groups = sorted(grouped_subnet.items())
        available_cidrs = []

        for subnet_size, subnet_configs in subnet_groups:
            newcidrs = net.subnet(int(subnet_size))

            for subnet_config in subnet_configs:
                try:
                    cidr = newcidrs.next()
                except StopIteration as e:
                    net = chain(*reversed(available_cidrs)).next()
                    newcidrs = net.subnet(int(subnet_size))
                    cidr = newcidrs.next()

                new_config = assoc(subnet_config, 'cidr', str(cidr))
                yield new_config
            else:
                net = newcidrs.next()
                available_cidrs.append(newcidrs)

コード例 #28

0

ファイルを表示

 def process(msg):
     try:
         result = yield self.compute(report=False, **msg)
         bstream.send(result)
     except Exception as e:
         logger.exception(e)
         bstream.send(assoc(error_message(e), 'key', msg.get('key')))

コード例 #29

0

ファイルを表示

ファイル: test_batched.py プロジェクト: gridl/distributed

def run_traffic_jam(nsends, nbytes):
    # This test eats `nsends * nbytes` bytes in RAM
    np = pytest.importorskip('numpy')
    from distributed.protocol import to_serialize
    data = bytes(np.random.randint(0, 255, size=(nbytes, )).astype('u1').data)
    with echo_server() as e:
        comm = yield connect(e.address)

        b = BatchedSend(interval=0.01)
        b.start(comm)

        msg = {'x': to_serialize(data)}
        for i in range(nsends):
            b.send(assoc(msg, 'i', i))
            if np.random.random() > 0.5:
                yield gen.sleep(0.001)

        results = []
        count = 0
        while len(results) < nsends:
            # If this times out then I think it's a backpressure issue
            # Somehow we're able to flood the socket so that the receiving end
            # loses some of our messages
            L = yield gen.with_timeout(timedelta(seconds=5), comm.read())
            count += 1
            results.extend(r['i'] for r in L)

        assert count == b.batch_count == e.count
        assert b.message_count == nsends

        assert results == list(range(nsends))

        comm.close()  # external closing
        yield b.close()

コード例 #30

0

ファイルを表示

ファイル: base_network.py プロジェクト: pingzhang/cloudformation-environmentbase

    def _get_subnet_config_w_cidr(self, network_config):
        network_cidr_base = str(
            network_config.get('network_cidr_base', '172.16.0.0'))
        network_cidr_size = str(network_config.get('network_cidr_size', '20'))
        first_network_address_block = str(
            network_config.get('first_network_address_block',
                               network_cidr_base))

        ret_val = {}
        base_cidr = network_cidr_base + '/' + network_cidr_size
        net = netaddr.IPNetwork(base_cidr)

        grouped_subnet = groupby('size',
                                 self._get_subnet_config_w_az(network_config))
        subnet_groups = sorted(grouped_subnet.items())
        available_cidrs = []

        for subnet_size, subnet_configs in subnet_groups:
            newcidrs = net.subnet(int(subnet_size))

            for subnet_config in subnet_configs:
                try:
                    cidr = newcidrs.next()
                except StopIteration as e:
                    net = chain(*reversed(available_cidrs)).next()
                    newcidrs = net.subnet(int(subnet_size))
                    cidr = newcidrs.next()

                new_config = assoc(subnet_config, 'cidr', str(cidr))
                yield new_config
            else:
                net = newcidrs.next()
                available_cidrs.append(newcidrs)

コード例 #31

0

ファイルを表示

def read_csv(fn, *args, **kwargs):
    chunkbytes = kwargs.pop('chunkbytes', 2**25)  # 50 MB
    categorize = kwargs.pop('categorize', None)
    index = kwargs.pop('index', None)
    if index and categorize == None:
        categorize = True

    kwargs = fill_kwargs(fn, args, kwargs)

    # Handle glob strings
    if '*' in fn:
        return concat([read_csv(f, *args, **kwargs) for f in sorted(glob(fn))])

    token = tokenize(os.path.getmtime(fn), args, kwargs)
    name = 'read-csv-%s-%s' % (fn, token)

    columns = kwargs.pop('columns')
    header = kwargs.pop('header')

    if 'nrows' in kwargs:  # Just create single partition
        dsk = {(name, 0): (apply, pd.read_csv, (fn,),
                                  assoc(kwargs, 'header', header))}
        result = DataFrame(dsk, name, columns, [None, None])

    else:
        # Chunk sizes and numbers
        total_bytes = file_size(fn, kwargs['compression'])
        nchunks = int(ceil(total_bytes / chunkbytes))
        divisions = [None] * (nchunks + 1)

        first_read_csv = partial(pd.read_csv, *args, header=header,
                               **dissoc(kwargs, 'compression'))
        rest_read_csv = partial(pd.read_csv, *args, header=None,
                              **dissoc(kwargs, 'compression'))

        # Create dask graph
        dsk = dict(((name, i), (rest_read_csv, (BytesIO,
                                   (textblock, fn,
                                       i*chunkbytes, (i+1) * chunkbytes,
                                       kwargs['compression']))))
                   for i in range(1, nchunks))
        dsk[(name, 0)] = (first_read_csv, (BytesIO,
                           (textblock, fn, 0, chunkbytes, kwargs['compression'])))

        result = DataFrame(dsk, name, columns, divisions)

    if categorize or index:
        categories, quantiles = categories_and_quantiles(fn, args, kwargs,
                                                         index, categorize,
                                                         chunkbytes=chunkbytes)

    if categorize:
        func = partial(categorize_block, categories=categories)
        result = result.map_partitions(func, columns=columns)

    if index:
        result = set_partition(result, index, quantiles)

    return result

コード例 #32

0

ファイルを表示

ファイル: test_term.py プロジェクト: zhygit/zipline

    def test_parameterized_term_default_value(self):
        defaults = {'a': 'default for a', 'b': 'default for b'}

        class F(Factor):
            params = defaults

            inputs = (SomeDataSet.foo,)
            dtype = 'f8'
            window_length = 5

        assert_equal(F().params, defaults)
        assert_equal(F(a='new a').params, assoc(defaults, 'a', 'new a'))
        assert_equal(F(b='new b').params, assoc(defaults, 'b', 'new b'))
        assert_equal(
            F(a='new a', b='new b').params,
            {'a': 'new a', 'b': 'new b'},
        )

コード例 #33

0

ファイルを表示

 def decode(self, data: bytes) -> _DecodedMsgType:
     try:
         raw_decoded = cast(Dict[str, int], super().decode(data))
     except rlp.exceptions.ListDeserializationError:
         self.logger.warning("Malformed Disconnect message: %s", data)
         raise MalformedMessage(f"Malformed Disconnect message: {data}")
     return assoc(raw_decoded, "reason_name",
                  self.get_reason_name(raw_decoded["reason"]))

コード例 #34

0

ファイルを表示

ファイル: server.py プロジェクト: mindis/blaze

 def run(self, *args, **kwargs):
     port = kwargs.pop('port', DEFAULT_PORT)
     self.port = port
     try:
         self.app.run(*args, port=port, **kwargs)
     except socket.error:
         print("\tOops, couldn't connect on port %d.  Is it busy?" % port)
         self.run(*args, **assoc(kwargs, 'port', port + 1))

コード例 #35

0

ファイルを表示

    def experiments_GET(self):
        """ List all Application's Experiments including Experiments' status """
        app_id = self.request.swagger_data['appid']
        experiments = Experiment.query().join(Application)\
            .filter(Application.id == app_id).all()

        return list(map(lambda _: assoc(_.as_dict(), 'status',\
            _.get_status()), experiments))

コード例 #36

0

ファイルを表示

ファイル: base_network.py プロジェクト: pingzhang/cloudformation-environmentbase

    def _get_subnet_config_w_az(self, network_config):
        az_count = int(network_config.get('az_count', 2))
        subnet_config = network_config.get('subnet_config', {})

        for subnet in subnet_config:
            for az in range(az_count):
                newsubnet = assoc(subnet, 'AZ', az)
                yield newsubnet

コード例 #37

0

ファイルを表示

ファイル: main_save.py プロジェクト: wmcabee-cs/nbc_analysis

def fix_user_identities(event):
    field = 'user_identities'
    if field not in event:
        return None

    identities = event.pop(field)
    identities = (assoc(x, 'row_idx', event['row_idx']) for x in identities)
    return identities

コード例 #38

0

ファイルを表示

ファイル: base_network.py プロジェクト: stocktwits/cloudformation-environmentbase

    def _get_subnet_config_w_az(self, network_config):
        az_count = int(network_config.get('az_count', 2))
        subnet_config = network_config.get('subnet_config', {})

        for subnet in subnet_config:
            for az in range(az_count):
                newsubnet = assoc(subnet, 'AZ', az)
                yield newsubnet

コード例 #39

0

ファイルを表示

ファイル: core.py プロジェクト: zhouyonglong/dask-lightgbm

def train(client, X, y, params, model_factory, sample_weight=None, **kwargs):
    data_parts = X.to_delayed()
    label_parts = y.to_delayed()
    if isinstance(data_parts, np.ndarray):
        assert data_parts.shape[1] == 1
        data_parts = data_parts.flatten().tolist()
    if isinstance(label_parts, np.ndarray):
        assert label_parts.ndim == 1 or label_parts.shape[1] == 1
        label_parts = label_parts.flatten().tolist()

    # Arrange parts into tuples.  This enforces co-locality
    if sample_weight is not None:
        sample_weight_parts = sample_weight.to_delayed()
        if isinstance(sample_weight_parts, np.ndarray):
            assert sample_weight_parts.ndim == 1 or sample_weight_parts.shape[
                1] == 1
            sample_weight_parts = sample_weight_parts.flatten().tolist()
        parts = list(
            map(delayed, zip(data_parts, label_parts, sample_weight_parts)))
    else:
        parts = list(map(delayed, zip(data_parts, label_parts)))

    parts = client.compute(parts)  # Start computation in the background
    wait(parts)
    for part in parts:
        if part.status == 'error':
            part  # trigger error locally
    key_to_part_dict = dict([(part.key, part) for part in parts])
    who_has = client.who_has(parts)
    worker_map = defaultdict(list)
    for key, workers in who_has.items():
        worker_map[first(workers)].append(key_to_part_dict[key])
    master_worker = first(worker_map)
    ncores = client.ncores()  # Number of cores per worker
    if "tree_learner" not in params or params['tree_learner'].lower() not in {
            "data", "feature", "voting"
    }:
        logger.warning(
            "Parameter tree_learner not set or set to incorrect value (%s), using 'data' as default",
            params.get("tree_learner", None))
        params['tree_learner'] = "data"
    # Tell each worker to init the booster on the chunks/parts that it has locally
    futures_classifiers = [
        client.submit(_fit_local,
                      model_factory=model_factory,
                      params=assoc(params, 'num_threads', ncores[worker]),
                      list_of_parts=list_of_parts,
                      worker_addresses=list(worker_map.keys()),
                      local_listen_port=params.get("local_listen_port", 12400),
                      listen_time_out=params.get("listen_time_out", 120),
                      return_model=worker == master_worker,
                      **kwargs)
        for worker, list_of_parts in worker_map.items()
    ]

    results = client.gather(futures_classifiers)
    results = [v for v in results if v]
    return results[0]

コード例 #40

0

ファイルを表示

ファイル: core.py プロジェクト: dma092/dask-xgboost

def _train(client, params, data, labels, dmatrix_kwargs={}, **kwargs):
    """
    Asynchronous version of train

    See Also
    --------
    train
    """
    # Break apart Dask.array/dataframe into chunks/parts
    data_parts = data.to_delayed()
    label_parts = labels.to_delayed()
    if isinstance(data_parts, np.ndarray):
        assert data_parts.shape[1] == 1
        data_parts = data_parts.flatten().tolist()
    if isinstance(label_parts, np.ndarray):
        assert label_parts.ndim == 1 or label_parts.shape[1] == 1
        label_parts = label_parts.flatten().tolist()

    # Arrange parts into pairs.  This enforces co-locality
    parts = list(map(delayed, zip(data_parts, label_parts)))
    parts = client.compute(parts)  # Start computation in the background
    yield wait(parts)

    for part in parts:
        if part.status == 'error':
            yield part  # trigger error locally

    # Because XGBoost-python doesn't yet allow iterative training, we need to
    # find the locations of all chunks and map them to particular Dask workers
    key_to_part_dict = dict([(part.key, part) for part in parts])
    who_has = yield client.scheduler.who_has(keys=[part.key for part in parts])
    worker_map = defaultdict(list)
    for key, workers in who_has.items():
        worker_map[first(workers)].append(key_to_part_dict[key])

    ncores = yield client.scheduler.ncores()  # Number of cores per worker

    # Start the XGBoost tracker on the Dask scheduler
    host, port = parse_host_port(client.scheduler.address)
    env = yield client._run_on_scheduler(start_tracker, host.strip('/:'),
                                         len(worker_map))

    # Tell each worker to train on the chunks/parts that it has locally
    futures = [
        client.submit(train_part,
                      env,
                      assoc(params, 'nthread', ncores[worker]),
                      list_of_parts,
                      workers=worker,
                      dmatrix_kwargs=dmatrix_kwargs,
                      **kwargs)
        for worker, list_of_parts in worker_map.items()
    ]

    # Get the results, only one will be non-None
    results = yield client._gather(futures)
    result = [v for v in results if v][0]
    raise gen.Return(result)

コード例 #41

0

ファイルを表示

ファイル: server.py プロジェクト: ChampagneDev/blaze

 def run(self, *args, **kwargs):
     """Run the server"""
     port = kwargs.pop('port', DEFAULT_PORT)
     self.port = port
     try:
         self.app.run(*args, port=port, **kwargs)
     except socket.error:
         print("\tOops, couldn't connect on port %d.  Is it busy?" % port)
         self.run(*args, **assoc(kwargs, 'port', port + 1))

コード例 #42

0

ファイルを表示

ファイル: client.py プロジェクト: blaze/blaze

def compute_down(expr,
                 ec,
                 profiler_output=None,
                 compute_kwargs=None,
                 odo_kwargs=None,
                 **kwargs):
    """Compute down for blaze clients.

    Parameters
    ----------
    expr : Expr
        The expression to send to the server.
    ec : Client
        The blaze client to compute against.
    namespace : dict[Symbol -> any], optional
        The namespace to compute the expression in. This will be amended to
        include that data for the server. By default this will just be the
        client mapping to the server's data.
    compute_kwargs : dict, optional
        Extra kwargs to pass to compute on the server.
    odo_kwargs : dict, optional
        Extra kwargs to pass to odo on the server.
    profile : bool, optional
        Should blaze server run cProfile over the computation of the expression
        and the serialization of the response.
    profiler_output : file-like object, optional
        A file like object to hold the profiling output from the server.
        If this is not passed then the server will write the data to the
        server's filesystem
    """
    from .server import to_tree

    kwargs = keymap(u8, kwargs)

    tree = to_tree(expr)
    serial = ec.serial
    if profiler_output is not None:
        kwargs[u'profile'] = True
        kwargs[u'profiler_output'] = ':response'

    kwargs[u'compute_kwargs'] = keymap(u8, compute_kwargs or {})
    kwargs[u'odo_kwargs'] = keymap(u8, odo_kwargs or {})

    r = post(
        ec,
        '/compute',
        data=serial.dumps(assoc(kwargs, u'expr', tree)),
        auth=ec.auth,
        headers=mimetype(serial),
    )

    if not ok(r):
        raise ValueError("Bad response: %s" % reason(r))
    response = serial.loads(content(r))
    if profiler_output is not None:
        profiler_output.write(response[u'profiler_output'])
    return serial.data_loads(response[u'data'])

コード例 #43

0

ファイルを表示

ファイル: utils.py プロジェクト: G3niusMind/steem-python

def json_expand(json_op, key_name='json'):
    """ Convert a string json object to Python dict in an op. """
    if type(json_op) == dict and key_name in json_op and json_op[key_name]:
        try:
            return update_in(json_op, [key_name], json.loads)
        except JSONDecodeError:
            return assoc(json_op, key_name, {})

    return json_op

コード例 #44

0

ファイルを表示

ファイル: utils.py プロジェクト: 00mjk/beowulf-python

def json_expand(json_op, key_name='json'):
    """ Convert a string json object to Python dict in an op. """
    if type(json_op) == dict and key_name in json_op and json_op[key_name]:
        try:
            return update_in(json_op, [key_name], json.loads)
        except JSONDecodeError:
            return assoc(json_op, key_name, {})

    return json_op

コード例 #45

0

ファイルを表示

ファイル: worker.py プロジェクト: mindis/distributed

    def _ready_task(self, function=None, key=None, args=(), kwargs={},
                    task=None, who_has=None):
        who_has = who_has or {}
        diagnostics = {}
        data = {k: self.data[k] for k in who_has if k in self.data}
        who_has = {k: set(map(coerce_to_address, v))
                   for k, v in who_has.items()
                   if k not in self.data}
        if who_has:
            try:
                logger.info("gather %d keys from peers: %s",
                            len(who_has), str(who_has))
                diagnostics['transfer-start'] = time()
                other = yield gather_from_workers(who_has)
                diagnostics['transfer-stop'] = time()
                self.data.update(other)
                yield self.center.add_keys(address=self.address,
                                           keys=list(other))
                data.update(other)
            except KeyError as e:
                logger.warn("Could not find data for %s", key)
                raise Return({'status': 'missing-data',
                              'keys': e.args,
                              'key': key})
        else:
            transfer_time = 0
        try:
            start = default_timer()
            if task is not None:
                task = loads(task)
            if function is not None:
                function = loads(function)
            if args:
                args = loads(args)
            if kwargs:
                kwargs = loads(kwargs)
            diagnostics['deserialization'] = default_timer() - start
        except Exception as e:
            logger.warn("Could not deserialize task", exc_info=True)
            raise Return(assoc(error_message(e), 'key', key))

        if task is not None:
            assert not function and not args and not kwargs
            function = execute_task
            args = (task,)

        # Fill args with data
        args2 = pack_data(args, data)
        kwargs2 = pack_data(kwargs, data)

        raise Return({'status': 'OK',
                      'function': function,
                      'args': args2,
                      'kwargs': kwargs2,
                      'diagnostics': diagnostics,
                      'key': key})

コード例 #46

0

ファイルを表示

ファイル: server.py プロジェクト: CaptainAL/Spyder

 def run(self, *args, **kwargs):
     """Run the server"""
     port = kwargs.pop('port', DEFAULT_PORT)
     self.port = port
     try:
         self.app.run(*args, port=port, **kwargs)
     except socket.error:
         print("\tOops, couldn't connect on port %d.  Is it busy?" % port)
         if kwargs.get('retry', True):
             # Attempt to start the server on a new port.
             self.run(*args, **assoc(kwargs, 'port', port + 1))

コード例 #47

0

ファイルを表示

ファイル: create.py プロジェクト: vhuarui/bcbio-nextgen

def _write_tool(step_dir, name, inputs, outputs):
    out_file = os.path.join(step_dir, "%s.cwl" % name)
    out = {"class": "CommandLineTool",
           "baseCommand": ["bcbio_nextgen.py", "runfn", name, "cwl"],
           "inputs": [],
           "outputs": []}
    for i, inp in enumerate(inputs):
        out["inputs"].append(tz.assoc(inp, "inputBinding",
                                      {"prefix": "%s=" % inp["id"].replace("#", ""), "separate": False,
                                       "itemSeparator": ";;", "position": i}))
    with open(out_file, "w") as out_handle:
        yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
    return os.path.join("steps", os.path.basename(out_file))

コード例 #48

0

ファイルを表示

ファイル: utils.py プロジェクト: denigma/assay-explorer

def set_model(filepath,k,v):
    """ Save key k and value v to json file. """
    f = open(filepath,'wr+')
    fstring = f.read()
    try:
        data = json.loads(fstring)
    except:
        data = dict()
    f.close()
    g = open(filepath,'w+')
    new_data = assoc(data,k,v)
    new_json_data = json.dumps(new_data)
    g.write(new_json_data)
    g.close()

コード例 #49

0

ファイルを表示

ファイル: test_term.py プロジェクト: FranSal/zipline

    def test_parameterized_term_default_value_with_not_specified(self):
        defaults = {'a': 'default for a', 'b': NotSpecified}

        class F(Factor):
            params = defaults

            inputs = (SomeDataSet.foo,)
            dtype = 'f8'
            window_length = 5

        pattern = r"F expected a keyword parameter 'b'\."
        with assert_raises_regex(TypeError, pattern):
            F()
        with assert_raises_regex(TypeError, pattern):
            F(a='new a')

        assert_equal(F(b='new b').params, assoc(defaults, 'b', 'new b'))
        assert_equal(
            F(a='new a', b='new b').params,
            {'a': 'new a', 'b': 'new b'},
        )

コード例 #50

0

ファイルを表示

ファイル: io.py プロジェクト: gochoam/dask

def fill_kwargs(fn, **kwargs):
    """ Read a csv file and fill up kwargs

    This normalizes kwargs against a sample file.  It does the following:

    1.  If given a globstring, just use one file
    2.  Get names from csv file if not given
    3.  Identify the presence of a header
    4.  Identify dtypes
    5.  Establish column names
    6.  Switch around dtypes and column names if parse_dates is active

    Normally ``pd.read_csv`` does this for us.  However for ``dd.read_csv`` we
    need to be consistent across multiple files and don't want to do these
    heuristics each time so we use the pandas solution once, record the
    results, and then send back a fully explicit kwargs dict to send to future
    calls to ``pd.read_csv``.

    Returns
    -------

    kwargs: dict
        keyword arguments to give to pd.read_csv
    """

    if 'index_col' in kwargs:
        msg = """
        The index column cannot be set at dataframe creation time. Instead use
        the `set_index` method on the dataframe after it is created.
        """
        raise ValueError(msg)

    kwargs = merge(csv_defaults, kwargs)
    sample_nrows = kwargs.pop('sample_nrows', 1000)
    essentials = ['columns', 'names', 'header', 'parse_dates', 'dtype']
    if set(essentials).issubset(kwargs):
        return kwargs

    # Let pandas infer on the first 100 rows
    if '*' in fn:
        filenames = sorted(glob(fn))
        if not filenames:
            raise ValueError("No files found matching name %s" % fn)
        fn = filenames[0]

    if kwargs['compression'] == 'infer':
        kwargs['compression'] = infer_compression(fn)

    if 'names' not in kwargs:
        kwargs['names'] = csv_names(fn, **kwargs)
        if 'header' not in kwargs:
            kwargs['header'] = 0
    else:
        if 'header' not in kwargs:
            kwargs['header'] = None

    kwargs = clean_kwargs(kwargs)
    try:
        head = pd.read_csv(fn, **assoc(kwargs, 'nrows', sample_nrows))
    except StopIteration:
        head = pd.read_csv(fn, **kwargs)

    if 'parse_dates' not in kwargs:
        kwargs['parse_dates'] = [col for col in head.dtypes.index
                           if np.issubdtype(head.dtypes[col], np.datetime64)]

    new_dtype = dict(head.dtypes)
    dtype = kwargs.get('dtype', dict())
    for k, v in dict(head.dtypes).items():
        if k not in dtype:
            dtype[k] = v

    if kwargs.get('parse_dates'):
        for col in kwargs['parse_dates']:
            del dtype[col]

    kwargs['dtype'] = dtype

    return (head.columns.map(lambda s: s.strip() if isinstance(s, str) else s),
            kwargs)

コード例 #51

0

ファイルを表示

ファイル: pyfunc.py プロジェクト: luccasmenezes/blaze

def _print_python(expr, leaves=None):
    child, scope = print_python(leaves, expr._child)
    funcname = next(funcnames)
    return ('%s(%s)' % (funcname, child),
            toolz.assoc(scope, funcname, expr.func))

コード例 #52

0

ファイルを表示

ファイル: core.py プロジェクト: aterrel/distributed

 def __init__(self, handlers, max_buffer_size=MAX_BUFFER_SIZE, **kwargs):
     self.handlers = assoc(handlers, 'identity', self.identity)
     self.id = uuid.uuid1()
     super(Server, self).__init__(max_buffer_size=max_buffer_size, **kwargs)

コード例 #53

0

ファイルを表示

ファイル: test_basic.py プロジェクト: ariddell/aioredux

 def reducer(state, action):
     if action['type'] == 'ADD_TODO':
         todos = state['todos'] + (action['text'],)
         return toolz.assoc(state, 'todos', todos)
     return state

コード例 #54

0

ファイルを表示

ファイル: core.py プロジェクト: necaris/blaze

def top_then_bottom_then_top_again_etc(expr, scope, **kwargs):
    """ Compute expression against scope

    Does the following interpreter strategy:

    1.  Try compute_down on the entire expression
    2.  Otherwise compute_up from the leaves until we experience a type change
        (e.g. data changes from dict -> pandas DataFrame)
    3.  Re-optimize expression and re-pre-compute data
    4.  Go to step 1

    Examples
    --------

    >>> import numpy as np

    >>> s = symbol('s', 'var * {name: string, amount: int}')
    >>> data = np.array([('Alice', 100), ('Bob', 200), ('Charlie', 300)],
    ...                 dtype=[('name', 'S7'), ('amount', 'i4')])

    >>> e = s.amount.sum() + 1
    >>> top_then_bottom_then_top_again_etc(e, {s: data})
    601

    See Also
    --------

    bottom_up_until_type_break  -- uses this for bottom-up traversal
    top_to_bottom -- older version
    bottom_up -- older version still
    """
    # 0. Base case: expression is in dict, return associated data
    if expr in scope:
        return scope[expr]

    if not hasattr(expr, '_leaves'):
        return expr

    leaf_exprs = list(expr._leaves())
    leaf_data = [scope.get(leaf) for leaf in leaf_exprs]

    # 1. See if we have a direct computation path with compute_down
    try:
        return compute_down(expr, *leaf_data, **kwargs)
    except NotImplementedError:
        pass

    # 2. Compute from the bottom until there is a data type change
    expr2, scope2 = bottom_up_until_type_break(expr, scope, **kwargs)

    # 3. Re-optimize data and expressions
    optimize_ = kwargs.get('optimize', optimize)
    pre_compute_ = kwargs.get('pre_compute', pre_compute)
    if pre_compute_:
        scope3 = dict((e, pre_compute_(e, datum,
                                       **assoc(kwargs, 'scope', scope2)))
                      for e, datum in scope2.items())
    else:
        scope3 = scope2
    if optimize_:
        try:
            expr3 = optimize_(expr2, *[scope3[leaf]
                                       for leaf in expr2._leaves()])
            _d = dict(zip(expr2._leaves(), expr3._leaves()))
            scope4 = dict((e._subs(_d), d) for e, d in scope3.items())
        except NotImplementedError:
            expr3 = expr2
            scope4 = scope3
    else:
        expr3 = expr2
        scope4 = scope3

    # 4. Repeat
    if expr.isidentical(expr3):
        raise NotImplementedError("Don't know how to compute:\n"
                                  "type(expr): %s\n"
                                  "expr: %s\n"
                                  "data: %s" % (type(expr3), expr3, scope4))
    else:
        return top_then_bottom_then_top_again_etc(expr3, scope4, **kwargs)

コード例 #55

0

ファイルを表示

ファイル: into.py プロジェクト: leolujuyi/blaze

def into(a, b, **kwargs):
    # TODO: handle large CSV case
    return into(a, into(pd.DataFrame, b), **assoc(kwargs, 'dshape', b.dshape))

コード例 #56

0

ファイルを表示

ファイル: tcga_to_bcbio.py プロジェクト: samesun/bcbio-nextgen

def batch_tcga_metadata_by_participant(fns):
    metadata = [re.match(TCGA_RE, fn).groupdict() for fn in fns]
    metadata = [tz.assoc(d, "fn", fn) for d, fn in zip(metadata, fns)]
    participant = tz.groupby(lambda x: x["participant"], metadata).values()
    return participant

コード例 #57

0

ファイルを表示

ファイル: core.py プロジェクト: sonlia/distributed

 def __init__(self, handlers, max_buffer_size=MAX_BUFFER_SIZE, **kwargs):
     self.handlers = assoc(handlers, 'identity', self.identity)
     self.id = str(uuid.uuid1())
     self._port = None
     self._rpcs = dict()
     super(Server, self).__init__(max_buffer_size=max_buffer_size, **kwargs)