def exists_in(superset, subset): if issubset(subset, second(superset)): return True else: msg = '{} is missing data.'.format(first(superset)) msg2 = '{} is not a subset of {}'.format(subset, second(superset)) raise Exception('\n\n'.join([msg, msg2]))
def detect(timeseries): cx, cy, px, py = first(timeseries) return format(cx=cx, cy=cy, px=px, py=py, dates=get('dates', second(timeseries)), ccdresult=ccd.detect(**second(timeseries)))
def save(acquired, bounds, products, product_dates, spark_context, clip=False, specs_fn=chip_specs.get, chips_fn=chips.get): """Saves requested products to iwds Args: acquired (str): / separated datestrings in iso8601 format. Used to determine the daterange of input data. bounds (str): sequence of points ((x1, y1), (x2, y2), ...). Bounds are minboxed and then corresponding chip ids are determined from the result. products (sequence): products to save product_dates (sequence): product dates to produce and save spark_context: a spark cluster connection clip (bool): If True any points not falling within the minbox of bounds are filtered out. Returns: generator: {product: dataframe} """ ss = sql.SparkSession(spark_context) queries = fb.chip_spec_queries(fb.SPECS_URL) spec = first(specs_fn(queries[first(queries)])) coordinates = chips.bounds_to_coordinates(tuple(bounds), spec) job, jobconf = init(acquired=acquired, chip_ids=coordinates, products=products, product_dates=product_dates, specs_fn=specs_fn, refspec=spec, chips_fn=chips_fn, spark_context=spark_context, clip_box=f.minbox(bounds) if clip else None) # first, save the jobconf used to generate the products md5, cfg = f.serialize({k: f.represent(v.value) for k, v in jobconf.items()}) write(table='jobconf', dataframe=ss.createDataFrame([[md5, cfg]], jobconf_schema())) for p in products: df = ss.createDataFrame( job[p].map(lambda x: (float(x[0][0]), float(x[0][1]), float(x[0][2]), float(x[0][3]), str(x[0][5]), second(f.serialize(x[1])), second(f.serialize(x[2])), str(md5)))\ .repartition(fb.STORAGE_PARTITION_COUNT), schema=result_schema()) yield {p: write(table=f.cqlstr(job[p].name()), dataframe=df)}
def test_pyccd(): c = cfg.get('chipmunk-ard', env=test.env) x, y = get_in(['chip', 'proj-pt'], c['snap_fn'](x=test.x, y=test.y)) # get specs specmap = c['specs_fn'](specs=c['registry_fn']()) # get function that will return chipmap. # Don't create state with a realized variable to preserve memory chipmap = partial(chips.mapped, x=test.x, y=test.y, acquired=test.acquired, specmap=specmap, chips_fn=c['chips_fn']) # calculate locations chip. There's another function # here to be split out and organized. grid = first(filter(lambda x: x['name'] == 'chip', c['grid_fn']())) cw, ch = specs.refspec(specmap).get('data_shape') locations = chips.locations(x=x, y=y, cw=cw, ch=ch, rx=grid.get('rx'), ry=grid.get('ry'), sx=grid.get('sx'), sy=grid.get('sy')) data = c['format_fn'](x=x, y=y, locations=locations, dates_fn=c['dates_fn'], specmap=specmap, chipmap=chipmap()) # we are only testing the structure of the response here. # Full data validation is being done in the test for merlin.create() assert type(data) is tuple assert len(data) == 10000 assert type(first(data)) is tuple assert type(first(first(data))) is tuple assert type(second(first(data))) is dict assert type(second(second(first(data)))) is tuple or list assert len(second(second(first(data)))) > 0
def pipeline(chip, tx, ty, date, acquired, cfg): ctx = { 'tx': tx, 'ty': ty, 'cx': first(chip), 'cy': second(chip), 'date': date, 'acquired': acquired } return thread_first( ctx, partial(segments, cfg=cfg), segments_filter, partial(segaux.aux, cfg=cfg), segaux.aux_filter, segaux.combine, segaux.unload_segments, segaux.unload_aux, segaux.add_training_dates, add_average_reflectance, segaux.training_format, #segaux.log_chip, segaux.exit_pipeline)
def parameters(r): '''Check HTTP request parameters''' tx = get('tx', r, None) ty = get('ty', r, None) acquired = get('acquired', r, None) chips = get('chips', r, None) date = get('date', r, None) if (tx is None or ty is None or acquired is None or chips is None or date is None): raise Exception( 'tx, ty, acquired, chips and date are required parameters') else: return { 'tx': int(tx), 'ty': int(ty), 'acquired': acquired, 'date': date, 'chips': list(map(lambda chip: (int(first(chip)), int(second(chip))), chips)), 'test_data_exception': get('test_data_exception', r, None), 'test_training_exception': get('test_training_exception', r, None), 'test_save_exception': get('test_save_exception', r, None) }
def aux_filter(ctx): return assoc( ctx, 'aux', dict( list( filter(lambda d: first(get('nlcdtrn', second(d))) != 0, ctx['aux'].items()))))
def __curse_second(self): """ Help on cython_function_or_method in module cytoolz.itertoolz: The second element in a sequence >>> (1, 2, 3).second() 2 """ return cytoolz.second(self)
def check(a, b): """Reducer for efficiently comparing two unordered sequences. Executes in linear(On) time. Args: a: {k:[datestring1, datestring2...]} b: {k:[datestring2, datestring1...]} Returns: b if a == b, else Exception with details """ if f.seqeq(second(a), second(b)): return b else: msg = ('assymetric dates detected - {} != {}' .format(first(a), first(b))) msga = '{}{}'.format(first(a), second(a)) msgb = '{}{}'.format(first(b), second(b)) raise Exception('\n\n'.join([msg, msga, msgb]))
def aux(ctx, cfg): '''Retrieve aux data''' data = merlin.create( x=ctx['cx'], y=ctx['cy'], acquired=ctx['acquired'], #'1982/2018', cfg=merlin.cfg.get(profile='chipmunk-aux', env={'CHIPMUNK_URL': cfg['aux_url']})) return assoc( ctx, 'aux', {first(d): second(d) for d in merlin.functions.denumpify(data)})
def aux(ctx, cfg): '''Retrieve aux data''' logger.info("getting aux for cx:{} cy:{}".format(ctx['cx'], ctx['cy'])) data = merlin.create(x=ctx['cx'], y=ctx['cy'], acquired=ctx['acquired'], cfg=merlin.cfg.get( profile='chipmunk-aux', env={'CHIPMUNK_URL': cfg['aux_url']})) return assoc( ctx, 'aux', {first(d): second(d) for d in merlin.functions.denumpify(data)})
def symmetric(datemap): """Returns a sequence of dates that are common to all map values if all datemap values are represented, else Exception. Args: datemap: {key: [datestrings,]} Returns: Sequence of date strings or Exception Example: >>> common({"reds": [ds3, ds1, ds2], "blues": [ds2, ds3, ds1]}) [2, 3, 1] >>> >>> common({"reds": [ds3, ds1], "blues": [ds2, ds3, ds1]}) Exception: reds:[3, 1] does not match blues:[2, 3, 1] """ def check(a, b): """Reducer for efficiently comparing two unordered sequences. Executes in linear(On) time. Args: a: {k:[datestring1, datestring2...]} b: {k:[datestring2, datestring1...]} Returns: b if a == b, else Exception with details """ if f.seqeq(second(a), second(b)): return b else: msg = ('assymetric dates detected - {} != {}' .format(first(a), first(b))) msga = '{}{}'.format(first(a), second(a)) msgb = '{}{}'.format(first(b), second(b)) raise Exception('\n\n'.join([msg, msga, msgb])) return second(reduce(check, datemap.items()))
def test_create(): c = cfg.get('chipmunk-ard', env=test.env) x, y = c.get('snap_fn')(x=test.x, y=test.y).get('chip').get('proj-pt') ubids = cfg.ubids.get('chipmunk-ard').get('reds') registry = c.get('registry_fn')() refspec = specs.refspec(specs.mapped(specs=registry, ubids={'reds': ubids})) # print("REFSPEC:{}".format(refspec)) chipseq = c.get('chips_fn')(x=x, y=y, acquired=test.acquired, ubids=ubids) dateseq = dates.mapped(chipmap=dict(reds=chipseq)).get('reds') grid = {x['name']: x for x in c.get('grid_fn')()}.get('chip') locations = chips.locations(x=x, y=y, cw=first(refspec.get('data_shape')), ch=second(refspec.get('data_shape')), rx=grid.get('rx'), ry=grid.get('ry'), sx=grid.get('sx'), sy=grid.get('sy')) _rods = rods.create(x=x, y=y, chipseq=chipseq, dateseq=dateseq, locations=locations, spec_index=specs.index(registry)) assert len(_rods) == 10000 assert type(_rods) is dict
def second(self): return cytoolz.second(self)
def serialize_deserialize(t): assert f.deserialize(second(f.serialize(t))) == t