def _add_doc_information(xml, result): doc_id = xml.xpath("//p:application-reference/p:document-id/p:doc-number", namespaces=NAMESPACES) country = xml.xpath("//p:application-reference/p:document-id/p:country", namespaces=NAMESPACES) date = xml.xpath("//p:application-reference/p:document-id/p:date", namespaces=NAMESPACES) result[DOCUMENT_ID] = cytoolz.first(doc_id).text result[COUNTRY] = cytoolz.first(country).text result[DATE] = cytoolz.first(date).text
def _iter(self, usecols=None): # get the date column [(name, type)] pairs datecols = list(map(first, get_date_columns(self.schema))) # figure out which ones pandas needs to parse parse_dates = ordered_index(datecols, self.schema) if usecols is not None: parse_dates = [d for d in parse_dates if d in set(usecols)] reader = self.iterreader(parse_dates=parse_dates, usecols=usecols, squeeze=True) # pop one off the iterator initial = next(iter(reader)) # get our names and initial dtypes for later inference if isinstance(initial, pd.Series): names = [str(initial.name)] formats = [initial.dtype] else: if usecols is None: index = slice(None) else: index = initial.columns.get_indexer(usecols) names = list(map(str, initial.columns[index])) formats = initial.dtypes[index].tolist() initial_dtype = np.dtype({'names': names, 'formats': formats}) # what dtype do we actually want to see when we read streaming_dtype = self.get_streaming_dtype(initial_dtype) # everything must ultimately be a list of tuples m = partial(bz.into, list) slicerf = lambda x: x.replace('', np.nan) if isinstance(initial, pd.Series): streaming_dtype = streaming_dtype[first(streaming_dtype.names)] if streaming_dtype != initial_dtype: # we don't have the desired type so jump through hoops with # to_records -> astype(desired dtype) -> listify def mapper(x, dtype=streaming_dtype): r = slicerf(x) try: r = r.to_records(index=False) except AttributeError: # We have a series r = r.values return m(r.astype(dtype)) else: mapper = compose(m, slicerf) # convert our initial NDFrame to a list return it.chain(mapper(initial), it.chain.from_iterable(map(mapper, reader)))
def test_registry(): registry = chipmunk.registry(url=test.env.get('CHIPMUNK_URL')) assert len(registry) > 0 assert type(registry) is tuple or list entry = first(registry) assert type(entry) is dict keys = ['ubid', 'info', 'tags', 'data_type', 'data_fill', 'data_shape'] assert all([key in entry for entry in registry for key in keys])
def module2regulon(db: Type[RankingDatabase], module: Regulon, motif_annotations: pd.DataFrame, weighted_recovery=False, return_recovery_curves=False, module2features_func=module2features) -> Optional[Regulon]: # First calculating a dataframe and then derive the regulons from them introduces a performance penalty. df = module2df(db, module, motif_annotations, weighted_recovery=weighted_recovery, return_recovery_curves=return_recovery_curves, module2features_func=module2features_func) if len(df) == 0: return None regulons = df2regulons(df) return first(regulons) if len(regulons) > 0 else None
def compute_up(t, seq, **kwargs): try: row = first(seq) except StopIteration: return () seq = concat([[row], seq]) # re-add row to seq if isinstance(row, list): seq = map(tuple, seq) return unique(seq)
def check(a, b): """Reducer for efficiently comparing two unordered sequences. Executes in linear(On) time. Args: a: {k:[datestring1, datestring2...]} b: {k:[datestring2, datestring1...]} Returns: b if a == b, else Exception with details """ if f.seqeq(second(a), second(b)): return b else: msg = ('assymetric dates detected - {} != {}' .format(first(a), first(b))) msga = '{}{}'.format(first(a), second(a)) msgb = '{}{}'.format(first(b), second(b)) raise Exception('\n\n'.join([msg, msga, msgb]))
def insert_chip(self, detections): def chip(detection): return { 'cx': detection['cx'], 'cy': detection['cy'], 'dates': detection['dates'] } c = chip(first(detections)) return self._put_json(self._chip_key(c['cx'], c['cy']), [c], compress=True)
def __init__( self, *, DEBUG: bool = False, TESTING: bool = False, **kwargs: Union[ModelComponent, Endpoint], ): self.DEBUG = DEBUG self.TESTING = TESTING kwarg_comps, kwarg_endpoints = _parse_composition_kwargs(**kwargs) self._name_endpoints = kwarg_endpoints self._uid_comps = {v.uid: v for v in kwarg_comps.values()} self._uid_names_map = {v.uid: k for k, v in kwarg_comps.items()} self._connections = list( concat([ c._flashserve_meta_.connections for c in kwarg_comps.values() ])) if len(self._name_endpoints) == 0: comp = first(self.components.values()) # one element iterable ep_route = f"/{comp._flashserve_meta_.exposed.__name__}" ep_inputs = { k: f"{comp.uid}.inputs.{k}" for k in asdict(comp.inputs).keys() } ep_outputs = { k: f"{comp.uid}.outputs.{k}" for k in asdict(comp.outputs).keys() } ep = Endpoint(route=ep_route, inputs=ep_inputs, outputs=ep_outputs) self._name_endpoints[ f"{comp._flashserve_meta_.exposed.__name__}_ENDPOINT"] = ep self._name_ep_protos = {} for ep_key, ep in self._name_endpoints.items(): for ep_comp in itertools.chain(ep.inputs.values(), ep.outputs.values()): uid, argtype, name = ep_comp.split(".") if uid not in self.components: raise AttributeError( f"{uid} not found. Expected one of {self.components.keys()}" ) try: _ = getattr(getattr(self.components[uid], f"{argtype}"), name) except AttributeError: raise AttributeError( f"uid={uid}, argtype={argtype}, name={name}") self._name_ep_protos[ep_key] = EndpointProtocol( name=ep_key, endpoint=ep, components=self.components)
def load_model(ctx, cfg): with ceph.connect(cfg) as c: ctile = c.select_tile(ctx['tx'], ctx['ty']) model = bytes.fromhex(first(ctile)['model']) if model is None: raise Exception( "No model found for tx:{tx} and ty:{ty}".format(**ctx)) else: return assoc(ctx, 'model_bytes', model)
def load_model(ctx, cfg): stmt = db.select_tile(cfg, ctx['tx'], ctx['ty']) fn = excepts( StopIteration, lambda cfg, statement: bytes.fromhex( first(db.execute_statement(cfg, statement)).model)) model = fn(cfg, stmt) if model is None: raise Exception("No model found for tx:{tx} and ty:{ty}".format(**ctx)) else: return assoc(ctx, 'model_bytes', model)
def create(x, y, acquired, cfg): """Create a timeseries. Args: x (int): x coordinate y (int): y coordinate acquired (string): iso8601 date range cfg (dict): A Merlin configuration Returns: tuple - Results of format_fn applied to results of chips_fn """ x, y = get_in(['chip', 'proj-pt'], cfg['snap_fn'](x=x, y=y)) # get specs specmap = cfg['specs_fn'](specs=cfg['registry_fn']()) # get function that will return chipmap. # Don't create state with a realized variable to preserve memory chipmap = partial(chips.mapped, x=x, y=y, acquired=acquired, specmap=specmap, chips_fn=cfg['chips_fn']) # calculate locations chip. There's another function # here to be split out and organized. grid = first(filter(lambda x: x['name'] == 'chip', cfg['grid_fn']())) cw, ch = specs.refspec(specmap).get('data_shape') locations = partial(chips.locations, x=x, y=y, cw=cw, ch=ch, rx=grid.get('rx'), ry=grid.get('ry'), sx=grid.get('sx'), sy=grid.get('sy')) return cfg['format_fn'](x=x, y=y, locations=locations(), dates_fn=cfg['dates_fn'], specmap=specmap, chipmap=chipmap())
def aux(ctx, cfg): '''Retrieve aux data''' data = merlin.create( x=ctx['cx'], y=ctx['cy'], acquired=ctx['acquired'], #'1982/2018', cfg=merlin.cfg.get(profile='chipmunk-aux', env={'CHIPMUNK_URL': cfg['aux_url']})) return assoc( ctx, 'aux', {first(d): second(d) for d in merlin.functions.denumpify(data)})
def aux(ctx, cfg): '''Retrieve aux data''' logger.info("getting aux for cx:{} cy:{}".format(ctx['cx'], ctx['cy'])) data = merlin.create(x=ctx['cx'], y=ctx['cy'], acquired=ctx['acquired'], cfg=merlin.cfg.get( profile='chipmunk-aux', env={'CHIPMUNK_URL': cfg['aux_url']})) return assoc( ctx, 'aux', {first(d): second(d) for d in merlin.functions.denumpify(data)})
def test_pyccd(): c = cfg.get('chipmunk-ard', env=test.env) x, y = get_in(['chip', 'proj-pt'], c['snap_fn'](x=test.x, y=test.y)) # get specs specmap = c['specs_fn'](specs=c['registry_fn']()) # get function that will return chipmap. # Don't create state with a realized variable to preserve memory chipmap = partial(chips.mapped, x=test.x, y=test.y, acquired=test.acquired, specmap=specmap, chips_fn=c['chips_fn']) # calculate locations chip. There's another function # here to be split out and organized. grid = first(filter(lambda x: x['name'] == 'chip', c['grid_fn']())) cw, ch = specs.refspec(specmap).get('data_shape') locations = chips.locations(x=x, y=y, cw=cw, ch=ch, rx=grid.get('rx'), ry=grid.get('ry'), sx=grid.get('sx'), sy=grid.get('sy')) data = c['format_fn'](x=x, y=y, locations=locations, dates_fn=c['dates_fn'], specmap=specmap, chipmap=chipmap()) # we are only testing the structure of the response here. # Full data validation is being done in the test for merlin.create() assert type(data) is tuple assert len(data) == 10000 assert type(first(data)) is tuple assert type(first(first(data))) is tuple assert type(second(first(data))) is dict assert type(second(second(first(data)))) is tuple or list assert len(second(second(first(data)))) > 0
def step(self) -> None: match = re.search(r"(F1SEQ1)|(SEQ2)", self.model.semester) if match is None: return self.sem_queue.appendleft(f"{self.model.semester}_MAJOR") prev_semester = (tlz.first(self.sem_queue) if len(self.sem_queue) == 1 else self.sem_queue.pop()) new_semester = f"{self.model.semester}_MAJOR" if new_semester != prev_semester: prev_major = tlz.last(self.majors) self._new_major = self.model.major_switcher.get_major( prev_semester, new_semester, prev_major)
def process_drives(content: str, headers: list) -> tuple: """Processes json lines for the Drive List sheet :param content: :param headers: :return: """ lines = content.split('\n') rows, bad_jsons = [], 0 for line in lines: cluster, jsons = line.split()[0].strip(':'), ' '.join(line.split()[1:]) try: for json_entry in json.loads(jsons): rows.append( [cluster] + first(ordered_jsons([flatten_dict(json_entry)], headers))) except json.JSONDecodeError: bad_jsons += 1 return rows, bad_jsons
def parameters(r): '''Check HTTP request parameters''' tx = get('tx', r, None) ty = get('ty', r, None) acquired = get('acquired', r, None) chips = get('chips', r, None) date = get('date', r, None) if (tx is None or ty is None or acquired is None or chips is None or date is None): raise Exception('tx, ty, acquired, chips and date are required parameters') else: return {'tx': int(tx), 'ty': int(ty), 'acquired': acquired, 'date': date, 'chips': list(map(lambda chip: (int(first(chip)), int(second(chip))), chips)), 'test_data_exception': get('test_data_exception', r, None), 'test_training_exception': get('test_training_exception', r, None), 'test_cassandra_exception': get('test_cassandra_exception', r, None)}
def average_reflectance_fn(segment): '''Add average reflectance values into dataset''' avgrefl = lambda intercept, slope, ordinal: add(intercept, mul(slope, ordinal)) arfn = partial(avgrefl, slope=first(get('slope', segment)), ordinal=arrow.get(get('date', segment)).datetime.toordinal()) ar = { 'blar': arfn(get('blint', segment)), 'grar': arfn(get('grint', segment)), 'niar': arfn(get('niint', segment)), 'rear': arfn(get('reint', segment)), 's1ar': arfn(get('s1int', segment)), 's2ar': arfn(get('s2int', segment)), 'thar': arfn(get('thint', segment)) } return merge(segment, ar)
def test_create(): c = cfg.get('chipmunk-ard', env=test.env) x, y = c.get('snap_fn')(x=test.x, y=test.y).get('chip').get('proj-pt') ubids = cfg.ubids.get('chipmunk-ard').get('reds') registry = c.get('registry_fn')() refspec = specs.refspec(specs.mapped(specs=registry, ubids={'reds': ubids})) # print("REFSPEC:{}".format(refspec)) chipseq = c.get('chips_fn')(x=x, y=y, acquired=test.acquired, ubids=ubids) dateseq = dates.mapped(chipmap=dict(reds=chipseq)).get('reds') grid = {x['name']: x for x in c.get('grid_fn')()}.get('chip') locations = chips.locations(x=x, y=y, cw=first(refspec.get('data_shape')), ch=second(refspec.get('data_shape')), rx=grid.get('rx'), ry=grid.get('ry'), sx=grid.get('sx'), sy=grid.get('sy')) _rods = rods.create(x=x, y=y, chipseq=chipseq, dateseq=dateseq, locations=locations, spec_index=specs.index(registry)) assert len(_rods) == 10000 assert type(_rods) is dict
def pipeline(chip, tx, ty, date, acquired, cfg): ctx = {'tx': tx, 'ty': ty, 'cx': first(chip), 'cy': second(chip), 'date': date, 'acquired': acquired, 'cluster': _cluster(cfg)} return thread_first(ctx, partial(segaux.segments, cfg=cfg), segments_filter, partial(segaux.aux, cfg=cfg), segaux.aux_filter, segaux.combine, segaux.unload_segments, segaux.unload_aux, segaux.add_training_dates, add_average_reflectance, segaux.training_format, segaux.log_chip, segaux.exit_pipeline)
def root_changeset_id(self) -> uuid.UUID: """ Returns the id of the root changeset """ return first(self.journal_data.keys())
def get_test_name(filler: Dict[str, Any]) -> str: assert len(filler) == 1 return first(filler)
def get_test_name(filler): assert len(filler) == 1 return first(filler)
def __init__(self, path, mode='rt', schema=None, columns=None, types=None, typehints=None, dialect=None, header=None, open=open, nrows_discovery=50, chunksize=1024, encoding=sys.getdefaultencoding(), **kwargs): if 'r' in mode and not os.path.isfile(path): raise ValueError('CSV file "%s" does not exist' % path) if schema is None and 'w' in mode: raise ValueError('Please specify schema for writable CSV file') self.path = path self.mode = mode self.open = {'gz': gzip.open, 'bz2': bz2.BZ2File}.get(ext(path), open) self.header = header self._abspath = os.path.abspath(path) self.chunksize = chunksize self.encoding = encoding sample = get_sample(self) self.dialect = dialect = discover_dialect(sample, dialect, **kwargs) if header is None: header = has_header(sample, encoding=encoding) elif isinstance(header, int): dialect['header'] = header header = True reader_dialect = keyfilter(read_csv_kwargs.__contains__, dialect) if not schema and 'w' not in mode: if not types: data = self._reader(skiprows=1 if header else 0, nrows=nrows_discovery, as_recarray=True, index_col=False, header=0 if header else None, **reader_dialect).tolist() types = discover(data) rowtype = types.subshape[0] if isinstance(rowtype[0], Tuple): types = types.subshape[0][0].dshapes types = [unpack(t) for t in types] types = [string if t == null else t for t in types] types = [t if isinstance(t, Option) or t == string else Option(t) for t in types] elif (isinstance(rowtype[0], Fixed) and isinstance(rowtype[1], CType)): types = int(rowtype[0]) * [rowtype[1]] else: raise ValueError("Could not discover schema from data.\n" "Please specify schema.") if not columns: if header: columns = first(self._reader(skiprows=0, nrows=1, header=None, **reader_dialect ).itertuples(index=False)) else: columns = ['_%d' % i for i in range(len(types))] if typehints: types = [typehints.get(c, t) for c, t in zip(columns, types)] schema = dshape(Record(list(zip(columns, types)))) self._schema = schema self.header = header
def _persist_block_chain( cls, db: DatabaseAPI, blocks: Iterable[BaseBeaconBlock], block_class: Type[BaseBeaconBlock], fork_choice_scorings: Iterable[ForkChoiceScoringFn], ) -> Tuple[Tuple[BaseBeaconBlock, ...], Tuple[BaseBeaconBlock, ...]]: blocks_iterator = iter(blocks) scorings_iterator = iter(fork_choice_scorings) try: first_block = first(blocks_iterator) first_scoring = first(scorings_iterator) except StopIteration: return tuple(), tuple() try: previous_canonical_head = cls._get_canonical_head( db, block_class).signing_root head_score = cls._get_score(db, previous_canonical_head) except CanonicalHeadNotFound: no_canonical_head = True else: no_canonical_head = False is_genesis = first_block.is_genesis if not is_genesis and not cls._block_exists(db, first_block.parent_root): raise ParentNotFound( "Cannot persist block ({}) with unknown parent ({})".format( encode_hex(first_block.signing_root), encode_hex(first_block.parent_root), )) score = first_scoring(first_block) curr_block_head = first_block db.set(curr_block_head.signing_root, ssz.encode(curr_block_head)) cls._add_block_root_to_slot_lookup(db, curr_block_head) cls._set_block_score_to_db(db, curr_block_head, score) cls._add_attestations_root_to_block_lookup(db, curr_block_head) orig_blocks_seq = concat([(first_block, ), blocks_iterator]) for parent, child in sliding_window(2, orig_blocks_seq): if parent.signing_root != child.parent_root: raise ValidationError( "Non-contiguous chain. Expected {} to have {} as parent but was {}" .format( encode_hex(child.signing_root), encode_hex(parent.signing_root), encode_hex(child.parent_root), )) curr_block_head = child db.set(curr_block_head.signing_root, ssz.encode(curr_block_head)) cls._add_block_root_to_slot_lookup(db, curr_block_head) cls._add_attestations_root_to_block_lookup(db, curr_block_head) # NOTE: len(scorings_iterator) should equal len(blocks_iterator) try: next_scoring = next(scorings_iterator) except StopIteration: raise MissingForkChoiceScoringFns score = next_scoring(curr_block_head) cls._set_block_score_to_db(db, curr_block_head, score) if no_canonical_head: return cls._set_as_canonical_chain_head( db, curr_block_head.signing_root, block_class) if score > head_score: return cls._set_as_canonical_chain_head( db, curr_block_head.signing_root, block_class) else: return tuple(), tuple()
def first(self): return cytoolz.first(self)
def parse_file(filename=16) -> str: return first(U.read_file(filename))
def setup_http_app(composition: "Composition", debug: bool) -> "FastAPI": from flash import __version__ app = FastAPI( debug=debug, version=__version__, title="FlashServe", ) # Endpoint Route # `/flashserve/alive` app.get( "/flashserve/alive", name="alive", description="If you can reach this endpoint, the server is runnning.", response_model=Alive, )(_build_alive_check()) _no_optimization_dsk = build_composition( endpoint_protocol=first(composition.endpoint_protocols.values()), components=composition.components, connections=composition.connections, ) pth = Path(__file__).parent.joinpath("templates") templates = Jinja2Templates(directory=str(pth.absolute())) # Endpoint Route # `/flashserve/component_dags` app.get( "/flashserve/component_dags", name="component_dags", summary="HTML Rendering of Component DAGs", response_class=HTMLResponse, )(_build_visualization(dsk_composition=_no_optimization_dsk, templates=templates, no_optimization=True)) # Endpoint Route # `/flashserve/dag_json` app.get( "/flashserve/dag_json", name="components JSON DAG", summary="JSON representation of component DAG", response_model=ComponentJSON, )(_build_dag_json( components=composition.components, ep_proto=None, show_connected_components=False, )) for ep_name, ep_proto in composition.endpoint_protocols.items(): dsk = build_composition( endpoint_protocol=ep_proto, components=composition.components, connections=composition.connections, ) RequestModel = ep_proto.request_model # skipcq: PYL-W0621 ResponseModel = ep_proto.response_model # skipcq: PYL-W0621 # Endpoint Route # `/{proto} app.post( f"{ep_proto.route}", name=ep_name, tags=[ep_name], summary="Perform a Compution.", description= "Computes results of DAG defined by these components & endpoint.", response_model=ResponseModel, )(_build_endpoint(RequestModel, dsk, ResponseModel)) # Endpoint Route: # `/{proto}/meta` app.get( f"{ep_proto.route}/meta", name=f"{ep_name} meta schema", tags=[ep_name], summary="OpenAPI schema", description="OpenAPI schema for this endpoints's compute route.", )(_build_meta(RequestModel)) # Endpoint Route # `/{proto}/dag` app.get( f"{ep_proto.route}/dag", name=f"{ep_name} DAG Visualization", tags=[ep_name], summary="HTML Rendering of DAG", description=( "Displays an html image rendering the DAG of functions " "& components executed to reach the endpoint outputs."), response_class=HTMLResponse, )(_build_visualization(dsk, templates)) # Endpoint Route # `/{proto}/dag_json` app.get( f"{ep_proto.route}/dag_json", name=f"{ep_name} JSON DAG", tags=[ep_name], summary="JSON representatino of DAG", response_model=MergedJSON, )(_build_dag_json( components=composition.components, ep_proto=ep_proto, show_connected_components=True, )) return app
def from_str(cls, s): return cls(first(s), int(s[1:]))
def render(self, model: KSUModel): gpa_vals = [student.gpa for student in model.schedule.agents] hist = tlz.first(np.histogram(gpa_vals, bins=self.bins)) return [int(x) for x in hist]
def test_create_index_unique(self, sql): create_index(sql, 'y', name='y_idx', unique=True) assert len(sql.indexes) == 1 idx = first(sql.indexes) assert idx.unique assert idx.columns.y == sql.c.y
def wrapper(*args, **kwargs): if get(name, first(args), None) is not None: raise Exception(name) else: return fn(*args, **kwargs)
def wrapper(*args, **kwargs): v = get(name, first(args), None) if v is None or (hasattr(v, '__len__') and len(v) == 0): return first(args) else: return fn(*args, **kwargs)
def wrapper(*args, **kwargs): if get('exception', first(args), None) is None: return fn(*args, **kwargs) else: return first(args)