def fake_metadata(num_exps,
                  num_plates,
                  num_wells,
                  num_sites,
                  datasets=('train', 'test')):
    rows = []
    for dataset in datasets:
        for _ in range(num_exps):
            cell_type = np.random.choice(CELL_TYPES)
            experiment_number = np.random.choice([1, 2, 3, 4, 5, 6, 7, 8, 9])
            experiment = '{}-{:02d}'.format(cell_type, experiment_number)
            base_row = {
                'experiment': experiment,
                'cell_type': cell_type,
                'dataset': dataset
            }
            for p in range(1, num_plates + 1):
                base_row = t.assoc(base_row, 'plate', p)
                rand_wells = np.random.choice(
                    ALL_WELLS, num_wells, replace=False)
                for well in rand_wells:
                    well_row = t.merge(base_row, {
                        'well': well,
                        'well_type': 'treatment',
                        'sirna': 1.0
                    })
                    for site in range(1, num_sites + 1):
                        rows.append(t.assoc(well_row, 'site', site))

    df = pd.DataFrame(rows)
    return df
예제 #2
0
    def data_for_app_GET(self):
        """ List all configurationkeys and rangeconstraints of specific application.
            Returns application with configurationkeys, rangeconstraints and exclusionconstraints
        """
        app_id = self.request.swagger_data['id']
        app = Application.get(app_id)
        if app is None:
            print_log(datetime.datetime.now(), 'GET',
                      '/applications/' + str(id) + '/rangeconstraints',
                      'Get all things of one application', None)
            return self.createResponse(None, 400)
        if app.apikey is None:
            app = self.set_app_apikey(app, app_id)
        configurationkeys = app.configurationkeys
        ranges = list(
            concat(list(map(lambda _: _.rangeconstraints, configurationkeys))))
        exclusions = self.get_app_exclusionconstraints(app_id)

        app_data = app.as_dict()
        app_data = assoc(app_data, 'configurationkeys',
                         list(map(lambda _: _.as_dict(), configurationkeys)))
        app_data = assoc(app_data, 'rangeconstraints',
                         list(map(lambda _: _.as_dict(), ranges)))
        app_data = assoc(app_data, 'exclusionconstraints',
                         list(map(lambda _: _.as_dict(), exclusions)))

        return app_data
예제 #3
0
 def __init__(self, center, workers):
     self.center = assoc(start_center(center), 'address', center)
     self.workers = [
         assoc(start_worker(center, worker), 'address', worker)
         for worker in workers
     ]
     sleep(1)
     self.report()
예제 #4
0
파일: todo.py 프로젝트: ariddell/aioredux
def todo_app(state, action):
    if action['type'] == ActionTypes.ADD_TODO:
        todos = state['todos'] + (action['text'],)
        return toolz.assoc(state, 'todos', todos)
    elif action['type'] == ActionTypes.COMPLETE_TODO:
        todos = state['todos'][:action['index']] + state['todos'][action['index'] + 1:]
        return toolz.assoc(state, 'todos', todos)
    else:
        return state
예제 #5
0
def todo_app(state, action):
    if action['type'] == ActionTypes.ADD_TODO:
        todos = state['todos'] + (action['text'],)
        return toolz.assoc(state, 'todos', todos)
    elif action['type'] == ActionTypes.COMPLETE_TODO:
        todos = state['todos'][:action['index']] + state['todos'][action['index'] + 1:]
        return toolz.assoc(state, 'todos', todos)
    else:
        return state
예제 #6
0
def set_transaction_type_if_needed(transaction_dict: Dict[str, Any]) -> Dict[str, Any]:
    if 'type' not in transaction_dict:
        if 'gasPrice' in transaction_dict and 'accessList' in transaction_dict:
            # access list txn - type 1
            transaction_dict = assoc(transaction_dict, 'type', '0x1')
        elif 'maxFeePerGas' in transaction_dict and 'maxPriorityFeePerGas' in transaction_dict:
            # dynamic fee txn - type 2
            transaction_dict = assoc(transaction_dict, 'type', '0x2')
    return transaction_dict
예제 #7
0
def rebatch_metadata_by_experiment(metadata):
    normal, normal_rest = prioritize_normals(metadata)
    batch = metadata[0]["participant"]
    tumor_batch = [tz.assoc(x, "batch", batch) for x in metadata if x["sample_type"] in PRIORITIZED_TUMOR_CODES.keys()]
    normal = [tz.assoc(normal, "batch", batch)] if normal else []
    # run each non priority normal as its own tumor sample with no control
    normal_rest = [tz.assoc(x, "batch", batch + "-" + x["sample_type"]) for x in normal_rest]
    normal_rest = [tz.assoc(x, "phenotype", "tumor") for x in normal_rest]
    all_batches = normal + normal_rest + tumor_batch
    return all_batches
예제 #8
0
파일: unify.py 프로젝트: gintian/MonPLP
def unify(u, v, s):
    u = transitive_get(u, s)
    v = transitive_get(v, s)
    if u == v:
        return s
    if isvar(u):
        return assoc(s, u, v)  #返回新的匹配组
    if isvar(v):
        return assoc(s, v, u)
    return _unify(u, v, s)
예제 #9
0
def tls_cluster_context(worker_kwargs=None, scheduler_kwargs=None,
                        security=None, **kwargs):
    security = security or tls_only_security()
    worker_kwargs = assoc(worker_kwargs or {}, 'security', security)
    scheduler_kwargs = assoc(scheduler_kwargs or {}, 'security', security)

    with cluster(worker_kwargs=worker_kwargs,
                 scheduler_kwargs=scheduler_kwargs,
                 **kwargs) as (s, workers):
        yield s, workers
예제 #10
0
def tls_cluster_context(worker_kwargs=None, scheduler_kwargs=None,
                        security=None, **kwargs):
    security = security or tls_only_security()
    worker_kwargs = assoc(worker_kwargs or {}, 'security', security)
    scheduler_kwargs = assoc(scheduler_kwargs or {}, 'security', security)

    with cluster(worker_kwargs=worker_kwargs,
                 scheduler_kwargs=scheduler_kwargs,
                 **kwargs) as (s, workers):
        yield s, workers
예제 #11
0
 def __init__(self, handlers, max_buffer_size=MAX_BUFFER_SIZE,
         connection_limit=512, **kwargs):
     self.handlers = assoc(handlers, 'identity', self.identity)
     self.id = str(uuid.uuid1())
     self._port = None
     self.rpc = ConnectionPool(limit=connection_limit)
     super(Server, self).__init__(max_buffer_size=max_buffer_size, **kwargs)
예제 #12
0
파일: core.py 프로젝트: dask/distributed
    def __init__(self, handlers, max_buffer_size=MAX_BUFFER_SIZE,
            connection_limit=512, deserialize=True, **kwargs):
        self.handlers = assoc(handlers, 'identity', self.identity)
        self.id = str(uuid.uuid1())
        self._port = None
        self._listen_streams = dict()
        self.rpc = ConnectionPool(limit=connection_limit,
                                  deserialize=deserialize)
        self.deserialize = deserialize
        self.monitor = SystemMonitor()
        self.counters = None
        self.digests = None
        if hasattr(self, 'loop'):
            with ignoring(ImportError):
                from .counter import Digest
                self.digests = defaultdict(partial(Digest, loop=self.loop))

            from .counter import Counter
            self.counters = defaultdict(partial(Counter, loop=self.loop))

            pc = PeriodicCallback(self.monitor.update, 500, io_loop=self.loop)
            self.loop.add_callback(pc.start)
            if self.digests is not None:
                self._last_tick = time()
                self._tick_pc = PeriodicCallback(self._measure_tick, 20, io_loop=self.loop)
                self.loop.add_callback(self._tick_pc.start)


        self.__stopped = False

        super(Server, self).__init__(max_buffer_size=max_buffer_size, **kwargs)
예제 #13
0
    async def test_unmonitored_threat(
        self,
        threat_monitor,
        threat_repo,
        occurrence_factory,
        threat_factory,
        report_threat_dto_factory,
    ):
        dto = report_threat_dto_factory()
        threat = threat_factory(
            name=dto.name,
            danger_level=dto.danger_level,
            location=dto.location,
            occurrences=[occurrence_factory(state="resolved").dict()],
        )
        new_threat = threat_factory(
            **assoc(
                threat.dict(),
                "occurrences",
                [occurrence_factory(state="pending").dict()],
            ), )

        threat_repo.upsert.return_value = threat
        threat_repo.create_pending_occurrence.return_value = new_threat
        threat_monitor.start_monitoring.return_value = new_threat

        result = await threat_service.report_threat(threat_monitor,
                                                    threat_repo, dto)

        threat_repo.upsert.assert_called_once_with(dto)
        threat_repo.create_pending_occurrence.assert_called_once_with(threat)
        threat_monitor.start_monitoring.assert_called_once_with(new_threat)

        assert result == new_threat
        assert result.is_being_monitored() is True
예제 #14
0
파일: wtg.py 프로젝트: sarenehan/uwnet
def step_with_model(rhs, state, dt=.125, n=100):
    """Perform a number of time steps with a model"""
    for t in range(n):
        qt_dot = rhs(state)
        new_qt = state['QT'] + dt * qt_dot['QT']
        state = assoc(state, 'QT', new_qt)
        yield state
예제 #15
0
    def __init__(self, handlers, connection_limit=512, deserialize=True,
                 io_loop=None):
        self.handlers = assoc(handlers, 'identity', self.identity)
        self.id = str(uuid.uuid1())
        self._address = None
        self._listen_address = None
        self._port = None
        self._comms = {}
        self.rpc = ConnectionPool(limit=connection_limit,
                                  deserialize=deserialize)
        self.deserialize = deserialize
        self.monitor = SystemMonitor()
        self.counters = None
        self.digests = None

        self.listener = None
        self.io_loop = io_loop or IOLoop.current()

        if hasattr(self, 'loop'):
            # XXX?
            with ignoring(ImportError):
                from .counter import Digest
                self.digests = defaultdict(partial(Digest, loop=self.loop))

            from .counter import Counter
            self.counters = defaultdict(partial(Counter, loop=self.loop))

            pc = PeriodicCallback(self.monitor.update, 500, io_loop=self.loop)
            self.loop.add_callback(pc.start)
            if self.digests is not None:
                self._last_tick = time()
                self._tick_pc = PeriodicCallback(self._measure_tick, 20, io_loop=self.loop)
                self.loop.add_callback(self._tick_pc.start)

        self.__stopped = False
예제 #16
0
파일: __init__.py 프로젝트: llllllllll/fz
def _normalize_arg(other, constants):
    """Get the name to use to build the string and turn the value into
    something that can go into the ast.

    If needed this will functionally update the constants.

    Parameters
    ----------
    other : any
        The object to normalize.
    constants : dict[str -> any]
        The constant namespace.

    Returns
    -------
    othername : str
        The name to use in the ``_name`` of the lambda.
    other : any
        The normalized value.
    constants : dict[str -> any]
        The potentially updated constants.
    """
    if not isinstance(other, placeholder):
        othername = repr(other)
        name = '_' + uuid4().hex
        constants = assoc(constants, name, other)
        other = ast.Name(id=name, ctx=ast.Load())
    elif other._tree is not other:
        othername = '(%s)' % other._name
    else:
        othername = other._name

    return othername, other, constants
예제 #17
0
def groupby_many(keys: Callable[[Any], Iterable], reducer: Reducer, initial):
    """Given a `keys` function, that maps an element into multiple keys, transduces the collection into a dictionary of key to group of matching elements.

    >>> transducer.transduce(
        transducer.groupby_many(
            lambda x: ("even",) if x % 2 == 0 else ("odd",),
            lambda s, x: (*s, x),
            (),
        ),
        lambda s, _: s,
        {},
        [1, 2, 3, 4, 5],
    )
    {"even": (2, 4), "odd": (1, 3, 5)}
    """
    return functional_generic.compose(
        mapcat(
            functional_generic.compose_left(
                functional_generic.juxt(keys, functional.wrap_tuple),
                sync.star(itertools.product),
            ), ),
        lambda step: lambda s, x: step(
            toolz.assoc(s, x[0], reducer(s.get(x[0], initial), x[1])),
            x,
        ),
    )
예제 #18
0
async def run_traffic_jam(nsends, nbytes):
    # This test eats `nsends * nbytes` bytes in RAM
    np = pytest.importorskip("numpy")
    from distributed.protocol import to_serialize

    data = bytes(np.random.randint(0, 255, size=(nbytes, )).astype("u1").data)
    async with EchoServer() as e:
        comm = await connect(e.address)

        b = BatchedSend(interval=0.01)
        b.start(comm)

        msg = {"x": to_serialize(data)}
        for i in range(nsends):
            b.send(assoc(msg, "i", i))
            if np.random.random() > 0.5:
                await asyncio.sleep(0.001)

        results = []
        count = 0
        while len(results) < nsends:
            # If this times out then I think it's a backpressure issue
            # Somehow we're able to flood the socket so that the receiving end
            # loses some of our messages
            L = await asyncio.wait_for(comm.read(), 5)
            count += 1
            results.extend(r["i"] for r in L)

        assert count == b.batch_count == e.count
        assert b.message_count == nsends

        assert results == list(range(nsends))

        await comm.close()  # external closing
        await b.close()
예제 #19
0
파일: core.py 프로젝트: indera/distributed
 def __init__(self, handlers, max_buffer_size=MAX_BUFFER_SIZE,
         connection_limit=512, **kwargs):
     self.handlers = assoc(handlers, 'identity', self.identity)
     self.id = str(uuid.uuid1())
     self._port = None
     self.rpc = ConnectionPool(limit=connection_limit)
     super(Server, self).__init__(max_buffer_size=max_buffer_size, **kwargs)
예제 #20
0
def run_traffic_jam(nsends, nbytes):
    # This test eats `nsends * nbytes` bytes in RAM
    np = pytest.importorskip('numpy')
    from distributed.protocol import to_serialize
    data = bytes(np.random.randint(0, 255, size=(nbytes,)).astype('u1').data)
    with echo_server() as e:
        comm = yield connect(e.address)

        b = BatchedSend(interval=0.01)
        b.start(comm)

        msg = {'x': to_serialize(data)}
        for i in range(nsends):
            b.send(assoc(msg, 'i', i))
            if np.random.random() > 0.5:
                yield gen.sleep(0.001)

        results = []
        count = 0
        while len(results) < nsends:
            # If this times out then I think it's a backpressure issue
            # Somehow we're able to flood the socket so that the receiving end
            # loses some of our messages
            L = yield gen.with_timeout(timedelta(seconds=5), comm.read())
            count += 1
            results.extend(r['i'] for r in L)

        assert count == b.batch_count == e.count
        assert b.message_count == nsends

        assert results == list(range(nsends))

        comm.close()  # external closing
        yield b.close()
예제 #21
0
def unify(u, v, s):  # no check at the moment
    """ Find substitution so that u == v while satisfying s

    >>> x = var('x')
    >>> unify((1, x), (1, 2), {})
    {~x: 2}
    """
    u = walk(u, s)
    v = walk(v, s)
    if u == v:
        return s
    if isvar(u):
        return assoc(s, u, v)
    if isvar(v):
        return assoc(s, v, u)
    return _unify(u, v, s)
예제 #22
0
파일: io.py 프로젝트: jayhetee/dask
def fill_kwargs(fn, args, kwargs):
    """ Read a csv file and fill up kwargs

    This normalizes kwargs against a sample file.  It does the following:

    1.  If given a globstring, just use one file
    2.  Get names from csv file if not given
    3.  Identify the presence of a header
    4.  Identify dtypes
    5.  Establish column names
    6.  Switch around dtypes and column names if parse_dates is active

    Normally ``pd.read_csv`` does this for us.  However for ``dd.read_csv`` we
    need to be consistent across multiple files and don't want to do these
    heuristics each time so we use the pandas solution once, record the
    results, and then send back a fully explicit kwargs dict to send to future
    calls to ``pd.read_csv``.

    Returns
    -------

    kwargs: dict
        keyword arguments to give to pd.read_csv
    """
    kwargs = merge(csv_defaults, kwargs)
    sample_nrows = kwargs.pop('sample_nrows', 1000)
    essentials = ['columns', 'names', 'header', 'parse_dates', 'dtype']
    if set(essentials).issubset(kwargs):
        return kwargs

    # Let pandas infer on the first 100 rows
    if '*' in fn:
        filenames = sorted(glob(fn))
        if not filenames:
            raise ValueError("No files found matching name %s" % fn)
        fn = filenames[0]

    if 'names' not in kwargs:
        kwargs['names'] = csv_names(fn, **kwargs)
    if 'header' not in kwargs:
        kwargs['header'] = infer_header(fn, **kwargs)
        if kwargs['header'] is True:
            kwargs['header'] = 0

    try:
        head = pd.read_csv(fn, *args, **assoc(kwargs, 'nrows', sample_nrows))
    except StopIteration:
        head = pd.read_csv(fn, *args, **kwargs)

    if 'parse_dates' not in kwargs:
        kwargs['parse_dates'] = [col for col in head.dtypes.index
                           if np.issubdtype(head.dtypes[col], np.datetime64)]
    if 'dtype' not in kwargs:
        kwargs['dtype'] = dict(head.dtypes)
        for col in kwargs['parse_dates']:
            del kwargs['dtype'][col]

    kwargs['columns'] = list(head.columns)

    return kwargs
예제 #23
0
def fill_kwargs(fn, args, kwargs):
    """ Read a csv file and fill up kwargs

    This normalizes kwargs against a sample file.  It does the following:

    1.  If given a globstring, just use one file
    2.  Get names from csv file if not given
    3.  Identify the presence of a header
    4.  Identify dtypes
    5.  Establish column names
    6.  Switch around dtypes and column names if parse_dates is active

    Normally ``pd.read_csv`` does this for us.  However for ``dd.read_csv`` we
    need to be consistent across multiple files and don't want to do these
    heuristics each time so we use the pandas solution once, record the
    results, and then send back a fully explicit kwargs dict to send to future
    calls to ``pd.read_csv``.

    Returns
    -------

    kwargs: dict
        keyword arguments to give to pd.read_csv
    """
    kwargs = merge(csv_defaults, kwargs)
    sample_nrows = kwargs.pop('sample_nrows', 1000)
    essentials = ['columns', 'names', 'header', 'parse_dates', 'dtype']
    if set(essentials).issubset(kwargs):
        return kwargs

    # Let pandas infer on the first 100 rows
    if '*' in fn:
        filenames = sorted(glob(fn))
        if not filenames:
            raise ValueError("No files found matching name %s" % fn)
        fn = filenames[0]

    if 'names' not in kwargs:
        kwargs['names'] = csv_names(fn, **kwargs)
    if 'header' not in kwargs:
        kwargs['header'] = infer_header(fn, **kwargs)
        if kwargs['header'] is True:
            kwargs['header'] = 0

    try:
        head = pd.read_csv(fn, *args, **assoc(kwargs, 'nrows', sample_nrows))
    except StopIteration:
        head = pd.read_csv(fn, *args, **kwargs)

    if 'parse_dates' not in kwargs:
        kwargs['parse_dates'] = [col for col in head.dtypes.index
                           if np.issubdtype(head.dtypes[col], np.datetime64)]
    if 'dtype' not in kwargs:
        kwargs['dtype'] = dict(head.dtypes)
        for col in kwargs['parse_dates']:
            del kwargs['dtype'][col]

    kwargs['columns'] = list(head.columns)

    return kwargs
예제 #24
0
def rebatch_metadata_by_experiment(metadata):
    normal, normal_rest = prioritize_normals(metadata)
    batch = metadata[0]["participant"]
    tumor_batch = [
        tz.assoc(x, "batch", batch) for x in metadata
        if x["sample_type"] in PRIORITIZED_TUMOR_CODES.keys()
    ]
    normal = [tz.assoc(normal, "batch", batch)] if normal else []
    # run each non priority normal as its own tumor sample with no control
    normal_rest = [
        tz.assoc(x, "batch", batch + "-" + x["sample_type"])
        for x in normal_rest
    ]
    normal_rest = [tz.assoc(x, "phenotype", "tumor") for x in normal_rest]
    all_batches = normal + normal_rest + tumor_batch
    return all_batches
예제 #25
0
 def process(msg):
     try:
         result = yield self.compute(report=False, **msg)
         bstream.send(result)
     except Exception as e:
         logger.exception(e)
         bstream.send(assoc(error_message(e), 'key', msg.get('key')))
예제 #26
0
    def test_data_for_app_GET(self):
        from toolz import assoc, concat
        self.req.swagger_data = {'id': 1}
        httpApps = Applications(self.req)
        response = httpApps.data_for_app_GET()

        app = Application.get(1)
        configurationkeys = app.configurationkeys
        ranges = list(concat(list(map(lambda _: _.rangeconstraints, configurationkeys))))

        app_data = app.as_dict()
        app_data = assoc(app_data, 'configurationkeys', list(map(lambda _: _.as_dict(), configurationkeys)))
        app_data = assoc(app_data, 'rangeconstraints', list(map(lambda _: _.as_dict(), ranges)))
        app_data = assoc(app_data, 'exclusionconstraints', list(map(lambda _: _.as_dict(), httpApps.get_app_exclusionconstraints(1))))

        assert response == app_data
    def _get_subnet_config_w_cidr(self, network_config):
        network_cidr_base = str(network_config.get('network_cidr_base', '172.16.0.0'))
        network_cidr_size = str(network_config.get('network_cidr_size', '20'))
        first_network_address_block = str(network_config.get('first_network_address_block', network_cidr_base))

        ret_val = {}
        base_cidr = network_cidr_base + '/' + network_cidr_size
        net = netaddr.IPNetwork(base_cidr)

        grouped_subnet = groupby('size', self._get_subnet_config_w_az(network_config))
        subnet_groups = sorted(grouped_subnet.items())
        available_cidrs = []

        for subnet_size, subnet_configs in subnet_groups:
            newcidrs = net.subnet(int(subnet_size))

            for subnet_config in subnet_configs:
                try:
                    cidr = newcidrs.next()
                except StopIteration as e:
                    net = chain(*reversed(available_cidrs)).next()
                    newcidrs = net.subnet(int(subnet_size))
                    cidr = newcidrs.next()

                new_config = assoc(subnet_config, 'cidr', str(cidr))
                yield new_config
            else:
                net = newcidrs.next()
                available_cidrs.append(newcidrs)
예제 #28
0
 def process(msg):
     try:
         result = yield self.compute(report=False, **msg)
         bstream.send(result)
     except Exception as e:
         logger.exception(e)
         bstream.send(assoc(error_message(e), 'key', msg.get('key')))
예제 #29
0
def run_traffic_jam(nsends, nbytes):
    # This test eats `nsends * nbytes` bytes in RAM
    np = pytest.importorskip('numpy')
    from distributed.protocol import to_serialize
    data = bytes(np.random.randint(0, 255, size=(nbytes, )).astype('u1').data)
    with echo_server() as e:
        comm = yield connect(e.address)

        b = BatchedSend(interval=0.01)
        b.start(comm)

        msg = {'x': to_serialize(data)}
        for i in range(nsends):
            b.send(assoc(msg, 'i', i))
            if np.random.random() > 0.5:
                yield gen.sleep(0.001)

        results = []
        count = 0
        while len(results) < nsends:
            # If this times out then I think it's a backpressure issue
            # Somehow we're able to flood the socket so that the receiving end
            # loses some of our messages
            L = yield gen.with_timeout(timedelta(seconds=5), comm.read())
            count += 1
            results.extend(r['i'] for r in L)

        assert count == b.batch_count == e.count
        assert b.message_count == nsends

        assert results == list(range(nsends))

        comm.close()  # external closing
        yield b.close()
    def _get_subnet_config_w_cidr(self, network_config):
        network_cidr_base = str(
            network_config.get('network_cidr_base', '172.16.0.0'))
        network_cidr_size = str(network_config.get('network_cidr_size', '20'))
        first_network_address_block = str(
            network_config.get('first_network_address_block',
                               network_cidr_base))

        ret_val = {}
        base_cidr = network_cidr_base + '/' + network_cidr_size
        net = netaddr.IPNetwork(base_cidr)

        grouped_subnet = groupby('size',
                                 self._get_subnet_config_w_az(network_config))
        subnet_groups = sorted(grouped_subnet.items())
        available_cidrs = []

        for subnet_size, subnet_configs in subnet_groups:
            newcidrs = net.subnet(int(subnet_size))

            for subnet_config in subnet_configs:
                try:
                    cidr = newcidrs.next()
                except StopIteration as e:
                    net = chain(*reversed(available_cidrs)).next()
                    newcidrs = net.subnet(int(subnet_size))
                    cidr = newcidrs.next()

                new_config = assoc(subnet_config, 'cidr', str(cidr))
                yield new_config
            else:
                net = newcidrs.next()
                available_cidrs.append(newcidrs)
예제 #31
0
def read_csv(fn, *args, **kwargs):
    chunkbytes = kwargs.pop('chunkbytes', 2**25)  # 50 MB
    categorize = kwargs.pop('categorize', None)
    index = kwargs.pop('index', None)
    if index and categorize == None:
        categorize = True

    kwargs = fill_kwargs(fn, args, kwargs)

    # Handle glob strings
    if '*' in fn:
        return concat([read_csv(f, *args, **kwargs) for f in sorted(glob(fn))])

    token = tokenize(os.path.getmtime(fn), args, kwargs)
    name = 'read-csv-%s-%s' % (fn, token)

    columns = kwargs.pop('columns')
    header = kwargs.pop('header')

    if 'nrows' in kwargs:  # Just create single partition
        dsk = {(name, 0): (apply, pd.read_csv, (fn,),
                                  assoc(kwargs, 'header', header))}
        result = DataFrame(dsk, name, columns, [None, None])

    else:
        # Chunk sizes and numbers
        total_bytes = file_size(fn, kwargs['compression'])
        nchunks = int(ceil(total_bytes / chunkbytes))
        divisions = [None] * (nchunks + 1)

        first_read_csv = partial(pd.read_csv, *args, header=header,
                               **dissoc(kwargs, 'compression'))
        rest_read_csv = partial(pd.read_csv, *args, header=None,
                              **dissoc(kwargs, 'compression'))

        # Create dask graph
        dsk = dict(((name, i), (rest_read_csv, (BytesIO,
                                   (textblock, fn,
                                       i*chunkbytes, (i+1) * chunkbytes,
                                       kwargs['compression']))))
                   for i in range(1, nchunks))
        dsk[(name, 0)] = (first_read_csv, (BytesIO,
                           (textblock, fn, 0, chunkbytes, kwargs['compression'])))

        result = DataFrame(dsk, name, columns, divisions)

    if categorize or index:
        categories, quantiles = categories_and_quantiles(fn, args, kwargs,
                                                         index, categorize,
                                                         chunkbytes=chunkbytes)

    if categorize:
        func = partial(categorize_block, categories=categories)
        result = result.map_partitions(func, columns=columns)

    if index:
        result = set_partition(result, index, quantiles)

    return result
예제 #32
0
파일: test_term.py 프로젝트: zhygit/zipline
    def test_parameterized_term_default_value(self):
        defaults = {'a': 'default for a', 'b': 'default for b'}

        class F(Factor):
            params = defaults

            inputs = (SomeDataSet.foo,)
            dtype = 'f8'
            window_length = 5

        assert_equal(F().params, defaults)
        assert_equal(F(a='new a').params, assoc(defaults, 'a', 'new a'))
        assert_equal(F(b='new b').params, assoc(defaults, 'b', 'new b'))
        assert_equal(
            F(a='new a', b='new b').params,
            {'a': 'new a', 'b': 'new b'},
        )
예제 #33
0
 def decode(self, data: bytes) -> _DecodedMsgType:
     try:
         raw_decoded = cast(Dict[str, int], super().decode(data))
     except rlp.exceptions.ListDeserializationError:
         self.logger.warning("Malformed Disconnect message: %s", data)
         raise MalformedMessage(f"Malformed Disconnect message: {data}")
     return assoc(raw_decoded, "reason_name",
                  self.get_reason_name(raw_decoded["reason"]))
예제 #34
0
파일: server.py 프로젝트: mindis/blaze
 def run(self, *args, **kwargs):
     port = kwargs.pop('port', DEFAULT_PORT)
     self.port = port
     try:
         self.app.run(*args, port=port, **kwargs)
     except socket.error:
         print("\tOops, couldn't connect on port %d.  Is it busy?" % port)
         self.run(*args, **assoc(kwargs, 'port', port + 1))
예제 #35
0
    def experiments_GET(self):
        """ List all Application's Experiments including Experiments' status """
        app_id = self.request.swagger_data['appid']
        experiments = Experiment.query().join(Application)\
            .filter(Application.id == app_id).all()

        return list(map(lambda _: assoc(_.as_dict(), 'status',\
            _.get_status()), experiments))
    def _get_subnet_config_w_az(self, network_config):
        az_count = int(network_config.get('az_count', 2))
        subnet_config = network_config.get('subnet_config', {})

        for subnet in subnet_config:
            for az in range(az_count):
                newsubnet = assoc(subnet, 'AZ', az)
                yield newsubnet
예제 #37
0
def fix_user_identities(event):
    field = 'user_identities'
    if field not in event:
        return None

    identities = event.pop(field)
    identities = (assoc(x, 'row_idx', event['row_idx']) for x in identities)
    return identities
    def _get_subnet_config_w_az(self, network_config):
        az_count = int(network_config.get('az_count', 2))
        subnet_config = network_config.get('subnet_config', {})

        for subnet in subnet_config:
            for az in range(az_count):
                newsubnet = assoc(subnet, 'AZ', az)
                yield newsubnet
예제 #39
0
def train(client, X, y, params, model_factory, sample_weight=None, **kwargs):
    data_parts = X.to_delayed()
    label_parts = y.to_delayed()
    if isinstance(data_parts, np.ndarray):
        assert data_parts.shape[1] == 1
        data_parts = data_parts.flatten().tolist()
    if isinstance(label_parts, np.ndarray):
        assert label_parts.ndim == 1 or label_parts.shape[1] == 1
        label_parts = label_parts.flatten().tolist()

    # Arrange parts into tuples.  This enforces co-locality
    if sample_weight is not None:
        sample_weight_parts = sample_weight.to_delayed()
        if isinstance(sample_weight_parts, np.ndarray):
            assert sample_weight_parts.ndim == 1 or sample_weight_parts.shape[
                1] == 1
            sample_weight_parts = sample_weight_parts.flatten().tolist()
        parts = list(
            map(delayed, zip(data_parts, label_parts, sample_weight_parts)))
    else:
        parts = list(map(delayed, zip(data_parts, label_parts)))

    parts = client.compute(parts)  # Start computation in the background
    wait(parts)
    for part in parts:
        if part.status == 'error':
            part  # trigger error locally
    key_to_part_dict = dict([(part.key, part) for part in parts])
    who_has = client.who_has(parts)
    worker_map = defaultdict(list)
    for key, workers in who_has.items():
        worker_map[first(workers)].append(key_to_part_dict[key])
    master_worker = first(worker_map)
    ncores = client.ncores()  # Number of cores per worker
    if "tree_learner" not in params or params['tree_learner'].lower() not in {
            "data", "feature", "voting"
    }:
        logger.warning(
            "Parameter tree_learner not set or set to incorrect value (%s), using 'data' as default",
            params.get("tree_learner", None))
        params['tree_learner'] = "data"
    # Tell each worker to init the booster on the chunks/parts that it has locally
    futures_classifiers = [
        client.submit(_fit_local,
                      model_factory=model_factory,
                      params=assoc(params, 'num_threads', ncores[worker]),
                      list_of_parts=list_of_parts,
                      worker_addresses=list(worker_map.keys()),
                      local_listen_port=params.get("local_listen_port", 12400),
                      listen_time_out=params.get("listen_time_out", 120),
                      return_model=worker == master_worker,
                      **kwargs)
        for worker, list_of_parts in worker_map.items()
    ]

    results = client.gather(futures_classifiers)
    results = [v for v in results if v]
    return results[0]
예제 #40
0
파일: core.py 프로젝트: dma092/dask-xgboost
def _train(client, params, data, labels, dmatrix_kwargs={}, **kwargs):
    """
    Asynchronous version of train

    See Also
    --------
    train
    """
    # Break apart Dask.array/dataframe into chunks/parts
    data_parts = data.to_delayed()
    label_parts = labels.to_delayed()
    if isinstance(data_parts, np.ndarray):
        assert data_parts.shape[1] == 1
        data_parts = data_parts.flatten().tolist()
    if isinstance(label_parts, np.ndarray):
        assert label_parts.ndim == 1 or label_parts.shape[1] == 1
        label_parts = label_parts.flatten().tolist()

    # Arrange parts into pairs.  This enforces co-locality
    parts = list(map(delayed, zip(data_parts, label_parts)))
    parts = client.compute(parts)  # Start computation in the background
    yield wait(parts)

    for part in parts:
        if part.status == 'error':
            yield part  # trigger error locally

    # Because XGBoost-python doesn't yet allow iterative training, we need to
    # find the locations of all chunks and map them to particular Dask workers
    key_to_part_dict = dict([(part.key, part) for part in parts])
    who_has = yield client.scheduler.who_has(keys=[part.key for part in parts])
    worker_map = defaultdict(list)
    for key, workers in who_has.items():
        worker_map[first(workers)].append(key_to_part_dict[key])

    ncores = yield client.scheduler.ncores()  # Number of cores per worker

    # Start the XGBoost tracker on the Dask scheduler
    host, port = parse_host_port(client.scheduler.address)
    env = yield client._run_on_scheduler(start_tracker, host.strip('/:'),
                                         len(worker_map))

    # Tell each worker to train on the chunks/parts that it has locally
    futures = [
        client.submit(train_part,
                      env,
                      assoc(params, 'nthread', ncores[worker]),
                      list_of_parts,
                      workers=worker,
                      dmatrix_kwargs=dmatrix_kwargs,
                      **kwargs)
        for worker, list_of_parts in worker_map.items()
    ]

    # Get the results, only one will be non-None
    results = yield client._gather(futures)
    result = [v for v in results if v][0]
    raise gen.Return(result)
예제 #41
0
 def run(self, *args, **kwargs):
     """Run the server"""
     port = kwargs.pop('port', DEFAULT_PORT)
     self.port = port
     try:
         self.app.run(*args, port=port, **kwargs)
     except socket.error:
         print("\tOops, couldn't connect on port %d.  Is it busy?" % port)
         self.run(*args, **assoc(kwargs, 'port', port + 1))
예제 #42
0
파일: client.py 프로젝트: blaze/blaze
def compute_down(expr,
                 ec,
                 profiler_output=None,
                 compute_kwargs=None,
                 odo_kwargs=None,
                 **kwargs):
    """Compute down for blaze clients.

    Parameters
    ----------
    expr : Expr
        The expression to send to the server.
    ec : Client
        The blaze client to compute against.
    namespace : dict[Symbol -> any], optional
        The namespace to compute the expression in. This will be amended to
        include that data for the server. By default this will just be the
        client mapping to the server's data.
    compute_kwargs : dict, optional
        Extra kwargs to pass to compute on the server.
    odo_kwargs : dict, optional
        Extra kwargs to pass to odo on the server.
    profile : bool, optional
        Should blaze server run cProfile over the computation of the expression
        and the serialization of the response.
    profiler_output : file-like object, optional
        A file like object to hold the profiling output from the server.
        If this is not passed then the server will write the data to the
        server's filesystem
    """
    from .server import to_tree

    kwargs = keymap(u8, kwargs)

    tree = to_tree(expr)
    serial = ec.serial
    if profiler_output is not None:
        kwargs[u'profile'] = True
        kwargs[u'profiler_output'] = ':response'

    kwargs[u'compute_kwargs'] = keymap(u8, compute_kwargs or {})
    kwargs[u'odo_kwargs'] = keymap(u8, odo_kwargs or {})

    r = post(
        ec,
        '/compute',
        data=serial.dumps(assoc(kwargs, u'expr', tree)),
        auth=ec.auth,
        headers=mimetype(serial),
    )

    if not ok(r):
        raise ValueError("Bad response: %s" % reason(r))
    response = serial.loads(content(r))
    if profiler_output is not None:
        profiler_output.write(response[u'profiler_output'])
    return serial.data_loads(response[u'data'])
예제 #43
0
def json_expand(json_op, key_name='json'):
    """ Convert a string json object to Python dict in an op. """
    if type(json_op) == dict and key_name in json_op and json_op[key_name]:
        try:
            return update_in(json_op, [key_name], json.loads)
        except JSONDecodeError:
            return assoc(json_op, key_name, {})

    return json_op
예제 #44
0
def json_expand(json_op, key_name='json'):
    """ Convert a string json object to Python dict in an op. """
    if type(json_op) == dict and key_name in json_op and json_op[key_name]:
        try:
            return update_in(json_op, [key_name], json.loads)
        except JSONDecodeError:
            return assoc(json_op, key_name, {})

    return json_op
예제 #45
0
    def _ready_task(self, function=None, key=None, args=(), kwargs={},
                    task=None, who_has=None):
        who_has = who_has or {}
        diagnostics = {}
        data = {k: self.data[k] for k in who_has if k in self.data}
        who_has = {k: set(map(coerce_to_address, v))
                   for k, v in who_has.items()
                   if k not in self.data}
        if who_has:
            try:
                logger.info("gather %d keys from peers: %s",
                            len(who_has), str(who_has))
                diagnostics['transfer-start'] = time()
                other = yield gather_from_workers(who_has)
                diagnostics['transfer-stop'] = time()
                self.data.update(other)
                yield self.center.add_keys(address=self.address,
                                           keys=list(other))
                data.update(other)
            except KeyError as e:
                logger.warn("Could not find data for %s", key)
                raise Return({'status': 'missing-data',
                              'keys': e.args,
                              'key': key})
        else:
            transfer_time = 0
        try:
            start = default_timer()
            if task is not None:
                task = loads(task)
            if function is not None:
                function = loads(function)
            if args:
                args = loads(args)
            if kwargs:
                kwargs = loads(kwargs)
            diagnostics['deserialization'] = default_timer() - start
        except Exception as e:
            logger.warn("Could not deserialize task", exc_info=True)
            raise Return(assoc(error_message(e), 'key', key))

        if task is not None:
            assert not function and not args and not kwargs
            function = execute_task
            args = (task,)

        # Fill args with data
        args2 = pack_data(args, data)
        kwargs2 = pack_data(kwargs, data)

        raise Return({'status': 'OK',
                      'function': function,
                      'args': args2,
                      'kwargs': kwargs2,
                      'diagnostics': diagnostics,
                      'key': key})
예제 #46
0
파일: server.py 프로젝트: CaptainAL/Spyder
 def run(self, *args, **kwargs):
     """Run the server"""
     port = kwargs.pop('port', DEFAULT_PORT)
     self.port = port
     try:
         self.app.run(*args, port=port, **kwargs)
     except socket.error:
         print("\tOops, couldn't connect on port %d.  Is it busy?" % port)
         if kwargs.get('retry', True):
             # Attempt to start the server on a new port.
             self.run(*args, **assoc(kwargs, 'port', port + 1))
예제 #47
0
def _write_tool(step_dir, name, inputs, outputs):
    out_file = os.path.join(step_dir, "%s.cwl" % name)
    out = {"class": "CommandLineTool",
           "baseCommand": ["bcbio_nextgen.py", "runfn", name, "cwl"],
           "inputs": [],
           "outputs": []}
    for i, inp in enumerate(inputs):
        out["inputs"].append(tz.assoc(inp, "inputBinding",
                                      {"prefix": "%s=" % inp["id"].replace("#", ""), "separate": False,
                                       "itemSeparator": ";;", "position": i}))
    with open(out_file, "w") as out_handle:
        yaml.safe_dump(out, out_handle, default_flow_style=False, allow_unicode=False)
    return os.path.join("steps", os.path.basename(out_file))
예제 #48
0
def set_model(filepath,k,v):
    """ Save key k and value v to json file. """
    f = open(filepath,'wr+')
    fstring = f.read()
    try:
        data = json.loads(fstring)
    except:
        data = dict()
    f.close()
    g = open(filepath,'w+')
    new_data = assoc(data,k,v)
    new_json_data = json.dumps(new_data)
    g.write(new_json_data)
    g.close()
예제 #49
0
    def test_parameterized_term_default_value_with_not_specified(self):
        defaults = {'a': 'default for a', 'b': NotSpecified}

        class F(Factor):
            params = defaults

            inputs = (SomeDataSet.foo,)
            dtype = 'f8'
            window_length = 5

        pattern = r"F expected a keyword parameter 'b'\."
        with assert_raises_regex(TypeError, pattern):
            F()
        with assert_raises_regex(TypeError, pattern):
            F(a='new a')

        assert_equal(F(b='new b').params, assoc(defaults, 'b', 'new b'))
        assert_equal(
            F(a='new a', b='new b').params,
            {'a': 'new a', 'b': 'new b'},
        )
예제 #50
0
파일: io.py 프로젝트: gochoam/dask
def fill_kwargs(fn, **kwargs):
    """ Read a csv file and fill up kwargs

    This normalizes kwargs against a sample file.  It does the following:

    1.  If given a globstring, just use one file
    2.  Get names from csv file if not given
    3.  Identify the presence of a header
    4.  Identify dtypes
    5.  Establish column names
    6.  Switch around dtypes and column names if parse_dates is active

    Normally ``pd.read_csv`` does this for us.  However for ``dd.read_csv`` we
    need to be consistent across multiple files and don't want to do these
    heuristics each time so we use the pandas solution once, record the
    results, and then send back a fully explicit kwargs dict to send to future
    calls to ``pd.read_csv``.

    Returns
    -------

    kwargs: dict
        keyword arguments to give to pd.read_csv
    """

    if 'index_col' in kwargs:
        msg = """
        The index column cannot be set at dataframe creation time. Instead use
        the `set_index` method on the dataframe after it is created.
        """
        raise ValueError(msg)

    kwargs = merge(csv_defaults, kwargs)
    sample_nrows = kwargs.pop('sample_nrows', 1000)
    essentials = ['columns', 'names', 'header', 'parse_dates', 'dtype']
    if set(essentials).issubset(kwargs):
        return kwargs

    # Let pandas infer on the first 100 rows
    if '*' in fn:
        filenames = sorted(glob(fn))
        if not filenames:
            raise ValueError("No files found matching name %s" % fn)
        fn = filenames[0]

    if kwargs['compression'] == 'infer':
        kwargs['compression'] = infer_compression(fn)

    if 'names' not in kwargs:
        kwargs['names'] = csv_names(fn, **kwargs)
        if 'header' not in kwargs:
            kwargs['header'] = 0
    else:
        if 'header' not in kwargs:
            kwargs['header'] = None

    kwargs = clean_kwargs(kwargs)
    try:
        head = pd.read_csv(fn, **assoc(kwargs, 'nrows', sample_nrows))
    except StopIteration:
        head = pd.read_csv(fn, **kwargs)

    if 'parse_dates' not in kwargs:
        kwargs['parse_dates'] = [col for col in head.dtypes.index
                           if np.issubdtype(head.dtypes[col], np.datetime64)]

    new_dtype = dict(head.dtypes)
    dtype = kwargs.get('dtype', dict())
    for k, v in dict(head.dtypes).items():
        if k not in dtype:
            dtype[k] = v

    if kwargs.get('parse_dates'):
        for col in kwargs['parse_dates']:
            del dtype[col]

    kwargs['dtype'] = dtype

    return (head.columns.map(lambda s: s.strip() if isinstance(s, str) else s),
            kwargs)
예제 #51
0
def _print_python(expr, leaves=None):
    child, scope = print_python(leaves, expr._child)
    funcname = next(funcnames)
    return ('%s(%s)' % (funcname, child),
            toolz.assoc(scope, funcname, expr.func))
예제 #52
0
파일: core.py 프로젝트: aterrel/distributed
 def __init__(self, handlers, max_buffer_size=MAX_BUFFER_SIZE, **kwargs):
     self.handlers = assoc(handlers, 'identity', self.identity)
     self.id = uuid.uuid1()
     super(Server, self).__init__(max_buffer_size=max_buffer_size, **kwargs)
예제 #53
0
 def reducer(state, action):
     if action['type'] == 'ADD_TODO':
         todos = state['todos'] + (action['text'],)
         return toolz.assoc(state, 'todos', todos)
     return state
예제 #54
0
파일: core.py 프로젝트: necaris/blaze
def top_then_bottom_then_top_again_etc(expr, scope, **kwargs):
    """ Compute expression against scope

    Does the following interpreter strategy:

    1.  Try compute_down on the entire expression
    2.  Otherwise compute_up from the leaves until we experience a type change
        (e.g. data changes from dict -> pandas DataFrame)
    3.  Re-optimize expression and re-pre-compute data
    4.  Go to step 1

    Examples
    --------

    >>> import numpy as np

    >>> s = symbol('s', 'var * {name: string, amount: int}')
    >>> data = np.array([('Alice', 100), ('Bob', 200), ('Charlie', 300)],
    ...                 dtype=[('name', 'S7'), ('amount', 'i4')])

    >>> e = s.amount.sum() + 1
    >>> top_then_bottom_then_top_again_etc(e, {s: data})
    601

    See Also
    --------

    bottom_up_until_type_break  -- uses this for bottom-up traversal
    top_to_bottom -- older version
    bottom_up -- older version still
    """
    # 0. Base case: expression is in dict, return associated data
    if expr in scope:
        return scope[expr]

    if not hasattr(expr, '_leaves'):
        return expr

    leaf_exprs = list(expr._leaves())
    leaf_data = [scope.get(leaf) for leaf in leaf_exprs]

    # 1. See if we have a direct computation path with compute_down
    try:
        return compute_down(expr, *leaf_data, **kwargs)
    except NotImplementedError:
        pass

    # 2. Compute from the bottom until there is a data type change
    expr2, scope2 = bottom_up_until_type_break(expr, scope, **kwargs)

    # 3. Re-optimize data and expressions
    optimize_ = kwargs.get('optimize', optimize)
    pre_compute_ = kwargs.get('pre_compute', pre_compute)
    if pre_compute_:
        scope3 = dict((e, pre_compute_(e, datum,
                                       **assoc(kwargs, 'scope', scope2)))
                      for e, datum in scope2.items())
    else:
        scope3 = scope2
    if optimize_:
        try:
            expr3 = optimize_(expr2, *[scope3[leaf]
                                       for leaf in expr2._leaves()])
            _d = dict(zip(expr2._leaves(), expr3._leaves()))
            scope4 = dict((e._subs(_d), d) for e, d in scope3.items())
        except NotImplementedError:
            expr3 = expr2
            scope4 = scope3
    else:
        expr3 = expr2
        scope4 = scope3

    # 4. Repeat
    if expr.isidentical(expr3):
        raise NotImplementedError("Don't know how to compute:\n"
                                  "type(expr): %s\n"
                                  "expr: %s\n"
                                  "data: %s" % (type(expr3), expr3, scope4))
    else:
        return top_then_bottom_then_top_again_etc(expr3, scope4, **kwargs)
예제 #55
0
파일: into.py 프로젝트: leolujuyi/blaze
def into(a, b, **kwargs):
    # TODO: handle large CSV case
    return into(a, into(pd.DataFrame, b), **assoc(kwargs, 'dshape', b.dshape))
예제 #56
0
def batch_tcga_metadata_by_participant(fns):
    metadata = [re.match(TCGA_RE, fn).groupdict() for fn in fns]
    metadata = [tz.assoc(d, "fn", fn) for d, fn in zip(metadata, fns)]
    participant = tz.groupby(lambda x: x["participant"], metadata).values()
    return participant
예제 #57
0
파일: core.py 프로젝트: sonlia/distributed
 def __init__(self, handlers, max_buffer_size=MAX_BUFFER_SIZE, **kwargs):
     self.handlers = assoc(handlers, 'identity', self.identity)
     self.id = str(uuid.uuid1())
     self._port = None
     self._rpcs = dict()
     super(Server, self).__init__(max_buffer_size=max_buffer_size, **kwargs)