def events_index(): now = gefolge_web.util.now() future_events, past_events = more_itertools.partition( lambda event: event.end is not None and event.end < now, gefolge_web.event.model.Event) future_events, current_events = more_itertools.partition( lambda event: event.start is not None and event.start < now, future_events) return { 'current_events': sorted(current_events), 'future_events': sorted(future_events), 'past_events': sorted(past_events, reverse=True) }
def more_itertools_partition(): obj1 = [1, 2, 3, 4, 5, 10, 8] lst_a, lst_b = partition(lambda x: x % 2 == 0, obj1) print(list(lst_a)) print(list(lst_b))
def sort_line_chars( chars: Sequence[PDFChar], interpreter: PDFPageInterpreter) -> Sequence[PDFChar]: chars = (normalize_char(char, interpreter) for char in chars) chars = sorted(chars, key=lambda char: char["x0"]) main_chars, combining_chars = partition( lambda char: char["text"] and unicodedata.combining( char["text"]), chars) combining_chars_iter = peekable(iter(combining_chars)) for main_char in main_chars: yield main_char while combining_chars_iter: combining_char = combining_chars_iter.peek() overlap = max( min(main_char["x1"], combining_char["x1"]) - max(main_char["x0"], combining_char["x0"]), 0) if overlap < main_char["width"] * Decimal("0.5"): break yield combining_char next(combining_chars_iter, None) assert (next(combining_chars_iter, None) is None) return yield
def get_workunits(self) -> Iterable[MetadataWorkUnit]: self.add_config_to_report() self.check_email_domain_missing() if not self.should_skip_this_run: # Initialize the checkpoints self._init_checkpoints() # Generate the workunits. access_events = self._get_snowflake_history() aggregated_info_items_raw, operation_aspect_work_units_raw = partition( lambda x: isinstance(x, MetadataWorkUnit), self._aggregate_access_events(access_events), ) for wu in cast(Iterable[MetadataWorkUnit], operation_aspect_work_units_raw): self.report.report_workunit(wu) yield wu aggregated_info_items = list(aggregated_info_items_raw) assert len(aggregated_info_items) == 1 for time_bucket in cast( AggregatedAccessEvents, aggregated_info_items[0] ).values(): for aggregate in time_bucket.values(): wu = self._make_usage_stat(aggregate) self.report.report_workunit(wu) yield wu
async def handle_addresses(self, org_units, filename): addresses = map(attrgetter("post_address"), org_units) addresses = set(filter(None.__ne__, addresses)) if len(addresses) == 0: return address_lookups = await dar_helper.dar_datavask_multiple(addresses) # Split into two lists where lookup succeeded and failed success, failure = partition(lambda x: x[1] is None, address_lookups) success = dict(success) print(f"{len(success)} addresses found") self.dar_cache.update(success) failed_addresses = set(map(itemgetter(0), failure)) print(f"{len(failed_addresses)} addresses could not be found") if failed_addresses: # Join `failed_addresses` with `org_units` on `post_address` to get # `org_uuid` for each failed address lookup. joined: List[FailedDARLookup] = [ FailedDARLookup( org_uuid=org_unit.org_uuid, post_address=org_unit.post_address, ) for org_unit in org_units if org_unit.post_address in failed_addresses ] self.write_failed_addresses(joined, filename)
def filter_units(units, filter_ids): """Splits units into two based on filter_ids. Partitions the units such that no unit with a parent-id in filter_ids exist in one list. Any unit filtered like that is put in the other list. Example: >>> units = [(1, None), (2, 1), (3, 1), (4, 2), (5, 2), (6, 3), (7, 5)] >>> tup_to_unit = lambda tup: {'@id': tup[0], 'parentOrgUnit': tup[1]} >>> units = list(map(tup_to_unit, units)) >>> get_ids = lambda units: list(map(itemgetter('@id'), units)) >>> a, b = filter_units(units, [1]) >>> get_ids(a) [1, 2, 3, 4, 5, 6, 7] >>> get_ids(b) [] >>> a, b = filter_units(units, [2]) >>> get_ids(a) [2, 4, 5, 7] >>> get_ids(b) [1, 3, 6] >>> a, b = filter_units(units, [3]) >>> get_ids(a) [3, 6] >>> get_ids(b) [1, 2, 4, 5, 7] >>> a, b = filter_units(units, [3, 5]) >>> get_ids(a) [3, 5, 6, 7] >>> get_ids(b) [1, 2, 4] >>> a, b = filter_units(units, [3, 7]) >>> get_ids(a) [3, 6, 7] >>> get_ids(b) [1, 2, 4, 5] Args: units: List of units filter_ids: List of unit IDs to filter parents on Returns: list: List of units, with some filtered out """ def get_parent(parent_map, entry): """Build a list of parents.""" parent = parent_map.get(entry, None) if parent is None: return [entry] return [entry] + get_parent(parent_map, parent) parent_map = dict(map(itemgetter("@id", "parentOrgUnit"), units)) filter_set = set(filter_ids) def is_disjoint_from_filter_ids(unit): """Test for overlap between parents and filter_set.""" parent_set = set(get_parent(parent_map, unit["@id"])) return parent_set.isdisjoint(filter_set) return partition(is_disjoint_from_filter_ids, units)
async def handle_edit(self, filename: str, filedate: datetime): """ Handle changes to existing org units and details We are guaranteed to only have one row per org unit New details on an existing org unit will show up in this file, rather than the 'nye' file. So we have to potentially perform inserts of new data. As a row contains information about the org unit as well as its details, we do not know what has been changed. However, all information is managed by the external system so we can safely reimport the "same" data, as opposed to trying to compare the existing objects in OS2mo """ org_units = los_files.read_csv(filename, OrgUnit) org_unit_payloads = self.create_unit_payloads(org_units) detail_payloads = await self.create_detail_payloads(org_units) orgfunk_uuids = set(await util.lookup_organisationfunktion()) detail_creates, detail_edits = partition( lambda payload: payload["uuid"] in orgfunk_uuids, detail_payloads) converter = partial(mo_payloads.convert_create_to_edit, from_date=filedate.date().isoformat()) edits = map(converter, chain(org_unit_payloads, detail_edits)) async with util.get_client_session() as session: await util.create_details(session, detail_creates) await util.edit_details(session, edits)
def get_workunits(self) -> Iterable[MetadataWorkUnit]: clients = self._make_bigquery_clients() bigquery_log_entries = self._get_bigquery_log_entries(clients) parsed_bigquery_log_events = self._parse_bigquery_log_entries( bigquery_log_entries) parsed_events_uncasted: Iterable[Union[ReadEvent, QueryEvent, MetadataWorkUnit]] last_updated_work_units_uncasted: Iterable[Union[ReadEvent, QueryEvent, MetadataWorkUnit]] parsed_events_uncasted, last_updated_work_units_uncasted = partition( lambda x: isinstance(x, MetadataWorkUnit), parsed_bigquery_log_events) parsed_events: Iterable[Union[ReadEvent, QueryEvent]] = cast( Iterable[Union[ReadEvent, QueryEvent]], parsed_events_uncasted) last_updated_work_units: Iterable[MetadataWorkUnit] = cast( Iterable[MetadataWorkUnit], last_updated_work_units_uncasted) if self.config.include_operational_stats: for wu in last_updated_work_units: self.report.report_workunit(wu) yield wu hydrated_read_events = self._join_events_by_job_id(parsed_events) aggregated_info = self._aggregate_enriched_read_events( hydrated_read_events) for time_bucket in aggregated_info.values(): for aggregate in time_bucket.values(): wu = self._make_usage_stat(aggregate) self.report.report_workunit(wu) yield wu
async def stats(self, ctx): """Shows some general statistics about the bot. Do not confuse this with `{prefix}about` which is just the general info. This is just numbers. """ bot = self.bot command_map = itertools.starmap('{1} {0}'.format, bot.command_counter.most_common()) command_stats = '\n'.join(command_map) or 'No stats yet.' extension_stats = '\n'.join(f'{len(set(getattr(bot, attr).values()))} {attr}' for attr in ('cogs', 'extensions')) with self.process.oneshot(): memory_usage_in_mb = self.process.memory_full_info().uss / 1024**2 cpu_usage = self.process.cpu_percent() / psutil.cpu_count() uptime_seconds = bot.uptime.total_seconds() average_messages = bot.message_counter / uptime_seconds message_field = f'{bot.message_counter} messages\n({average_messages :.2f} messages/sec)' text, voice = partition(lambda c: isinstance(c, discord.TextChannel), bot.get_all_channels()) presence = (f"{bot.guild_count} Servers\n{ilen(text)} Text Channels\n" f"{ilen(voice)} Voice Channels\n{bot.user_count} Users") chiaki_embed = (discord.Embed(description=bot.appinfo.description, colour=self.bot.colour) .set_author(name=str(ctx.bot.user), icon_url=bot.user.avatar_url) .add_field(name='Modules', value=extension_stats) .add_field(name='CPU Usage', value=f'{cpu_usage}%\n{memory_usage_in_mb :.2f}MB') .add_field(name='Messages', value=message_field) .add_field(name='Presence', value=presence) .add_field(name='Commands', value=command_stats) .add_field(name='Uptime', value=self.bot.str_uptime.replace(', ', '\n')) ) await ctx.send(embed=chiaki_embed)
def _parse(self, args: GcsfsIngestArgs, contents_handle: GcsfsFileContentsHandle) -> IngestInfo: # Preprocess raw data. df = pd.read_csv(contents_handle.local_file_path, dtype=str).fillna('') df = df[df['Custody Status'] != 'Released'] # People who are rearrested can have multiple bonds for the same charge; # the bond with the greatest ID is the most current one. df = df[df.groupby('Charge ID')['BondID'].transform('max') == df['BondID']] # Some people have booking number collisions, so make these unique # per person. df['Booking Number'] += ' (Individual ID: ' + df['Individual ID'] + ')' ingest_info = super()._parse( args, self.DataFrameContentsHandle(contents_handle.local_file_path, df)) # Postprocess IngestInfo for charge in ingest_info.get_all_charges(): if charge.name: charge_parts = charge.name.lstrip('*').split('/') name, notes = partition(self._is_charge_note, charge_parts) charge.name = '/'.join(name) charge.charge_notes = '/'.join(notes) if charge.degree: match = re.match(r'CLASS (.+) MISDEMEANOR', charge.degree, re.IGNORECASE) if match: charge.level = match.group(1) charge.degree = None elif 'STATE JAIL' in charge.degree.upper(): charge.level = charge.degree charge.degree = None return ingest_info
def get_schema_graph(db: PartitionedDatabase) -> JsonDict: with db.transaction() as tx: structure = db.graph_schema_structure_tx(tx) models: List[Model] = structure.models model_ids: List[ModelId] = [m.id for m in models] property_counts: Dict[ModelId, int] = db.get_property_counts_tx(tx, model_ids) one_to_one, one_to_many = partition( lambda r: r.one_to_many, structure.relationships ) legacy_models = [ dict(type="concept", **to_concept_dict(m, property_counts[m.id])) for m in models ] legacy_model_relationships = [ dict(type="schemaRelationship", **to_legacy_relationship(r)) for r in one_to_many ] legacy_schema_linked_properties = [ to_schema_linked_property(r) for r in one_to_one ] return ( legacy_models + legacy_model_relationships + legacy_schema_linked_properties )
def partition_by_eligibility(charges: Tuple[Charge, ...]): ineligible_charges_generator, eligible_charges_generator = partition( lambda c: c.expungement_result.charge_eligibility.status == ChargeEligibilityStatus.ELIGIBLE_NOW if c.expungement_result.charge_eligibility else False, charges, ) return list(eligible_charges_generator), list(ineligible_charges_generator)
def parse(self) -> list[Ast]: successes, fails = partition(pred=lambda result: result.item is None, iterable=self._parse()) for fail in fails: logger.debug(fail.error) return [success.item for success in successes] # type: ignore
def sort_conditions(self, *conditions, allow_having=True): having_list = [] where_list = [] not_comps, comps = partition(lambda c: isinstance(c, SQLComparison), conditions) where, having = partition(lambda c: c.aggregate, comps) if not self.allow_having and having: raise SchemaError("'HAVING' can't be in an UPDATE statement.") if not allow_having and having: raise SchemaError("'HAVING' can't be an 'OR' condition.") where_list.extend(where) having_list.extend(having) for c in not_comps: where_list.append(self.sort_conditions(*c)[0]) if allow_having: return tuple(where_list), tuple(having_list) return tuple(where_list)
def run_scanning_analyzers(data, analyzers: Sequence[Analyzer], aggregate_with=None, save_state_with=None) -> AnalyzerContext: others, shareable = partition( lambda a: isinstance(a, ScanShareableAnalyzer), analyzers) shareable_list: List[ScanShareableAnalyzer] = cast( List[ScanShareableAnalyzer], list(shareable)) def merge_aggregations(aggregations_list: List[AggDefinition]): ma = defaultdict(set) # type: ignore for ags in aggregations_list: for k in ags: ma[k] = ma[k] | ags[k] return dict(ma) # Compute aggregation functions of shareable analyzers in a single pass over # the data # On Pandas this does not make a lot of sense results = None metrics_by_analyzer: Dict[Analyzer, Metric] = {} if len(shareable_list): try: # aggregations = # list(flatten(a._aggregation_functions() for a in shareable)) # This is a dic with column -> aggregation lists merged_aggregations = merge_aggregations( [a._aggregation_functions() for a in shareable_list]) # aggregations_names = list(flatten(list(merged_aggregations.values()))) # Compute offsets so that the analyzers can correctly pick their results # from the row # FIXME: Note that this only works if the aggregation does not generates # from now on internally the analyzers will use the function name so the # offset is not used (at least for the pandas implementation) agg_functions = [0] + [ len(a._aggregation_functions()) for a in shareable_list ] offsets = list(accumulate(agg_functions, lambda a, b: a + b))[:-1] results = data.agg(merged_aggregations) for an, offset in zip(shareable_list, offsets): metrics_by_analyzer[an] = _success_or_failure_metric_from( an, results, offset) except Exception as e: metrics_by_analyzer = { a: a.to_failure_metric(e) for a in analyzers } analyzer_context = AnalyzerContext(metrics_by_analyzer) else: analyzer_context = AnalyzerContext() # TODO: Run not shareable analyzers return analyzer_context
def set_up_pools(npools, nsamples, nreplicates): pool_size = int(np.ceil(nsamples * nreplicates / npools)) pool_cnt = [pool_size] * npools # count free slots per pool pool_cnt_sum = pool_size * npools pool_idx = list(range(npools)) # [0, ..., npools-1] pool_probs = [0] * npools # init list of length npools pool_log = [set() for _ in range(npools)] # which sample in which pool? try: for sample in range(nsamples): # Weigh pools by number of free sample slots. # Obtain probs for each pool by dividing by total # of free slots. for pool, count in enumerate(pool_cnt): pool_probs[pool] = count / pool_cnt_sum # Choose nreplicates many distinct pool. address = np.random.choice( pool_idx, nreplicates, replace=False, p=pool_probs) # Reduce free slots for chosen pools and add samples to pool_log. for pool in address: pool_cnt[pool] -= 1 # once 0, it wont be sampled again pool_log[pool].add(sample) pool_cnt_sum -= nreplicates # update sum of free slots except ValueError: # We had bad luck with sampling, there are less than nreplicates # many non-full pools (but we do have enough slots in the remaining # pools!). nonfull_pools, full_pools = [set(iter) for iter in partition( lambda i: pool_cnt[i]==0, pool_idx)] # Process all remaining samples. for sample in range(sample, nsamples): nnon_full = len(nonfull_pools) # Take as many non-full pools as we have, and add as many full # pools as neccessary to get enough replicates. address = list(nonfull_pools) + list(np.random.choice( list(full_pools), nreplicates-nnon_full, replace=False)) # Add samples to pool_log as above. for pool in address: pool_log[pool].add(sample) # Book-keeping: keep track of new full pools. new_full_pools = set() for pool in nonfull_pools: pool_cnt[pool] -= 1 if pool_cnt[pool] == 0: # pool is now full new_full_pools.add(pool) full_pools.update(new_full_pools) nonfull_pools -= new_full_pools return pool_log
def fvalidate_tickets(rules: dict, nearby: List[str]) -> Tuple: # So much more verbose to not do this imperatively validator = partial(validate_ticket, rules) validated = lmap(validator, nearby) invalid, valid = lmap(list, partition(lambda x: x[0], validated)) valid_tickets = [_[1] for _ in valid] invalid_tickets = [_[1] + _[2] for _ in invalid] invalid_nums = list(concat([_[2] for _ in invalid])) valid_nums = list(concat([_[1] for _ in valid])) return valid_tickets, invalid_tickets, valid_nums, invalid_nums
def _params_by_weight_decay(self, model, **kwargs): no_decay_kwargs = deepcopy(kwargs) no_decay_kwargs['weight_decay'] = 0.0 named_weights, named_biases = partition( _is_bias_namedparam, named_trainable_parameters(model)) yield {'params': map(lambda x: x[-1], named_weights), **kwargs} if named_biases: yield { 'params': map(lambda x: x[-1], named_biases), **no_decay_kwargs }
def name_values(): for thing in map( list, partition(lambda e: isinstance(e, discord.TextChannel), entries)): if not thing: continue name = f'{_get_class_name(thing[0])}{"s" * (len(thing) != 1)}' value = truncate(', '.join(map(str, thing)), 1024, '...') yield name, value
def two_columns_table(stats, split_by): # type: (Dict, Callable[Tuple[Any,Any], bool]) -> str """ Split the items by a filter function "split_by" and merge Example: two_columns_table({1: "a", 2: "b", 3: "c", 4: "d"}, is_odd) [ 2 , b , 1 , a ] [ 3 , d , 3 , c ] """ splatted = partition(split_by, stats.items()) flatten_merged_rows = map(collapse, zip_longest(*splatted)) return safe_tabulate(flatten_merged_rows, headers=())
def pytest_collection_modifyitems(items, config) -> None: """Deselect any items marked "full" unless the --full flag is set.""" normal, interactive = partition( lambda item: bool(item.get_closest_marker("interactive")), items) select, deselect = (interactive, normal) if config.option.interactive else (normal, interactive) config.hook.pytest_deselected(items=deselect) items[:] = select
def configure_whitenoise(config): not_security_middleware, security_middleware = partition( lambda item: "security" in item.lower(), config['MIDDLEWARE']) config['MIDDLEWARE'] = list(security_middleware) + \ ['whitenoise.middleware.WhiteNoiseMiddleware'] + \ list(not_security_middleware) # Configuration for whitenoise config['STATIC_ROOT'] = config['STATIC_PATH'] config[ 'STATICFILES_STORAGE'] = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
def build_func_parameter_values(task_definition, task_args, task_kwargs): # type: (TaskDefinition, List[Any], Dict[str, Any]) -> List[ParameterValue] """ In tracking task we need to build params without definitions. Those params value need no calculations and therefore are very easy to construct """ callable_spec = (task_definition.task_decorator.get_callable_spec() ) # type: CallableSpec values = [] # convert any arg to kwarg if possible args, task_kwargs = args_to_kwargs(callable_spec.args, task_args, task_kwargs) # the parameter of the * argument if callable_spec.varargs: # build the parameter value for the varargs vargs_param = build_user_parameter_value( callable_spec.varargs, tuple(args), source=task_definition.full_task_family_short, ) values.append(vargs_param) # distinguish between the parameter we expect to create vs those we don't unknown_kwargs, known_kwargs = more_itertools.partition( lambda kwarg: kwarg[0] in callable_spec.args, six.iteritems(task_kwargs)) # the parameter of the ** argument if callable_spec.varkw: # build the parameter value for the varkw varkw_param = build_user_parameter_value( callable_spec.varkw, dict(unknown_kwargs), source=task_definition.full_task_family_short, ) values.append(varkw_param) # Exclude the self param just like it's excluded in DecoratedCallableParamBuilder excluded_params = {"self"} for name, value in known_kwargs: if name in excluded_params: continue # build the parameters for the expected parameters param_value = build_user_parameter_value( name, value, source=task_definition.full_task_family_short) values.append(param_value) return values
def split_employees_leaves(employees: List[Dict]) -> Tuple[Iterable, Iterable]: """Split list of employees into two iterables, with either active employees or terminated employees >>> e = [{'@action': "test"}, {'@action': "leave"}] >>> e1, e2 = split_employees_leaves(e) >>> list(e1) [{'@action': 'test'}] >>> list(e2) [{'@action': 'leave'}] """ is_leave = lambda empl: empl.get("@action") == "leave" return partition(is_leave, employees)
def evolve(tiles): new_tiles = set() white_tiles = Counter() for tile in tiles: white_neighbours, black_neighbours = mit.partition( tiles.__contains__, neighbours(tile)) white_tiles.update(white_neighbours) if 0 < mit.ilen(black_neighbours) <= 2: new_tiles.add(tile) new_tiles.update(t for t in white_tiles if white_tiles[t] == 2) return new_tiles
def next(self): new = GridPart1() inactive_seen = set() for node in self.active: inactive, active = mit.partition(self.active.__contains__, GridPart1.neighbours(node)) if mit.ilen(active) in (2, 3): new.active.add(node) for neighbour in set(inactive) - self.active - inactive_seen: inactive_seen.add(neighbour) if len(set(GridPart1.neighbours(neighbour)) & self.active) == 3: new.active.add(neighbour) return new
def main(): with open("input") as lines: lines = [line.strip() for line in lines] incomplete, corrupted = partition( lambda value: isinstance(value, Corrupted), map(analyse, lines), ) syntax_error_score = sum(mismatch.score for mismatch in corrupted) print(syntax_error_score) autocomplete_score = median(autocompletion.score for autocompletion in incomplete) print(autocomplete_score)
def pick_toggled(values): bases, toggles = partition( lambda x: isinstance(x, Toggle), values, ) toggle = first(toggles, Toggle(False)) try: base = first(bases) except ValueError: raise NoValueFound("can't find base value to toggle", values.log) return toggle.value != base
def expand_primer_extension_products(self, only_one_required=False, lim_size=True): primers = self.get_alignments_by_types(Constants.PRIMER) rev, fwd = partition(lambda p: p.subject_region.direction == 1, primers) fwd, fwd_keys = sort_with_keys(fwd, key=lambda p: p.query_region.b) rev, rev_keys = sort_with_keys(rev, key=lambda p: p.query_region.a) pairs = [] for f in fwd: rev_bind_region = f.query_region[:-Config.PRIMER_MIN_BIND] rev_bind = self.filter_alignments_by_span( rev, rev_bind_region, key=lambda p: p.query_region.a) rev_bind, rkeys = sort_with_keys(rev_bind, key=lambda p: p.query_region.a) for r in rev_bind: if r.query_region.b in f.query_region: if r.query_region.b == f.query_region.b: pass else: continue pairs += self._create_primer_extension_alignment( f, r, Constants.PRIMER_EXTENSION_PRODUCT_WITH_PRIMERS, lim_size=lim_size, ) if only_one_required: for f in fwd: # existing fwd primer pairs += self._create_primer_extension_alignment( f, None, Constants.PRIMER_EXTENSION_PRODUCT_WITH_LEFT_PRIMER, lim_size=lim_size, ) for r in rev: # existing fwd primer pairs += self._create_primer_extension_alignment( None, r, Constants.PRIMER_EXTENSION_PRODUCT_WITH_RIGHT_PRIMER, lim_size=lim_size, ) return pairs
def traverse_linop(op: Callable[[Expr], Expr], ccheck: Callable[[Expr], bool], expr: Expr) -> Expr: """ Traverses an expression tree applying op with the linear operator rules """ rself: Callable[[Expr], Expr] = partial(traverse_linop, op, ccheck) # sub1 + sub2 if expr.func == Add: return Add(*map(rself, expr.args)) # const * sub1 * sub2 if expr.func == Mul: variables, [*constants] = partition(ccheck, expr.args) if len(constants) > 0: return Mul(*constants, rself(Mul(*variables))) return op(expr)
def test_bool(self): """Test when pred() returns a boolean""" lesser, greater = mi.partition(lambda x: x > 5, range(10)) self.assertEqual(list(lesser), [0, 1, 2, 3, 4, 5]) self.assertEqual(list(greater), [6, 7, 8, 9])
def test_arbitrary(self): """Test when pred() returns an integer""" divisibles, remainders = mi.partition(lambda x: x % 3, range(10)) self.assertEqual(list(divisibles), [0, 3, 6, 9]) self.assertEqual(list(remainders), [1, 2, 4, 5, 7, 8])
def resolve_args(namespaces): from pathlib import Path # TODO: make so that we can run without science frames args, head_info, names = namespaces # Positional argument and -s / --sci argument mean the same thing if args.files_or_directory and not args.sci: args.sci = args.files_or_directory if args.outdir: # output directory given explicitly args.outdir = iocheck(args.outdir, os.path.exists, 1) # else: # infer output directory from images provided # If input is a directory, process all files in tree! # If outdir given, rebuild the tree for reduced files there. Otherwise # maintain current tree for reduced files. from pySHOC import treeops root = Path(args.sci[0]) if root.is_dir(): # this is a directory try process entire tree! _infer_indir = False # first check if still has day-by-day folders # if next(args.sci[0].glob('[01][0-9][0-3][0-9]'), None): # # try to partition # treeops.partition_by_source(args.sci[0]) # get file tree tree = treeops.get_tree(root, '.fits') flats = tree.pop('flats', tree.pop('flat', None)) bias = tree.pop('bias', None) if not args.flats: args.flats = flats if not args.bias: args.bias = bias # flatten the tree into list of files args.sci = list(mit.flatten(tree.values())) else: _infer_indir = True # Resolve inputs and get the input folder form resolved file list for # sci / flats / bias _infer_outdir = not bool(args.outdir) work_dir = '' for name in ('sci', 'flats', 'bias'): # args.dark # 'sci', images = getattr(args, name) if images: # Resolve the input images images = parse.to_list(images, os.path.exists, include='*.fits', path=work_dir, abspaths=True, raise_error=1) # put resolved list in arg namespace setattr(args, name, images) if _infer_indir: work_dir = Path(images[0]).parent _infer_indir = False if _infer_outdir: args.outdir = os.path.split(images[0])[0] _infer_outdir = False # All inputs should now be resolved to lists of file names if args.sci: # Initialize Run args.sci = shocSciRun.load(args.sci, label='science') # TODO: use kind and set that as label default? # for cube in args.sci: # DO YOU NEED TO DO THIS IN A LOOP? # cube._needs_flip = not cube.cross_check(args.sci[0], 'flip_state') # self-consistency check for flip state of science cubes # #NOTE: THIS MAY BE INEFICIENT IF THE FIRST CUBE IS THE ONLY ONE WITH A DIFFERENT FLIP STATE... # =========================================================================== if args.gps: args.timing = True # Do timing if gps info given if len(args.gps) == 1: # triggers give either as single trigger time string or filename of trigger list valid_gps = iocheck(args.gps[0], validity.RA, raise_error=-1) # if valid single time this will return that same str else None if not valid_gps: args.gps = parse.to_list(args.gps, validity.RA, path=work_dir, abspath=0, sort=0, raise_error=1) # at ths point args.gps is list of explicit time strings. # Check if they are valid representations of time args.gps = [iocheck(g, validity.RA, raise_error=1, convert=convert.RA) for g in args.gps] # Convert and set as cube attribute args.sci.that_need_triggers().set_gps_triggers(args.gps) # if any cubes are GPS triggered on each individual frame grun = args.sci.that_need_kct() if len(args.kct) == 1 and len(grun) != 1: warn( 'A single GPS KCT provided for multiple externally triggered runs. ' 'Assuming this applies for all these files: %s' % grun) args.kct *= len(grun) # expand by repeating elif len(grun) != len(args.kct): l = str(len(args.kct)) or 'No' s = ': %s' % str(args.kct) if len(args.kct) else '' raise ValueError('%s GPS KCT values provided%s for %i file(s): %s' '' % (l, s, len(grun), grun)) # "Please specify KCT (Exposure time + Dead time):") # args.kct = InputCallbackLoop.str(msg, 0.04, check=validity.float, what='KCT') for cube, kct in zip(grun, args.kct): cube.timing.kct = kct # =========================================================================== if args.flats or args.bias: args.combine = list(map(str.lower, args.combine)) hows = 'day', 'daily', 'week', 'weekly' methods = 'sigma clipped', funcs = 'mean', 'median' vocab = hows + methods + funcs transmap = dict(mit.grouper(hows, 2)) understood, misunderstood = map(list, mit.partition(vocab.__contains__, args.combine)) if any(misunderstood): raise ValueError('Argument(s) {} for combine not understood.' ''.format(misunderstood)) else: understood = [transmap.get(u, u) for u in understood] how = next(filter(hows.__contains__, understood)) func = next(filter(funcs.__contains__, understood)) meth = next(filter(methods.__contains__, understood), '') args.combine = how args.fcombine = getattr(np, func) print('\nBias/Flat combination will be done by {}.'.format( ' '.join([how, meth, func]))) # TODO: sigma clipping ... even though it sucks # =========================================================================== if args.flats: # TODO full matching here ... # args.flats = parse.to_list(args.flats, imaccess, path=work_dir, raise_error=1) args.flats = shocFlatFieldRun.load(args.flats, label='flat') # isolate the flat fields that match the science frames. only these will be processed match = args.flats.cross_check(args.sci, 'binning', 1) args.flats = args.flats[match] # check which are master flats # for flat in args.flats: # flat._needs_flip = not flat.cross_check(args.sci[0], 'flip_state') # flag the flats that need to be subframed, based on the science frames which are subframed args.flats.flag_sub(args.sci) args.flats.print_instrumental_setup() # check which of the given flats are potentially master # is_master = [f.ndims == 2 for f in args.flats] # else: # print('WARNING: No flat fielding will be done!') # =========================================================================== if args.bias: # args.bias = parse.to_list(args.bias, imaccess, path=work_dir, raise_error=1) args.bias = shocBiasRun.load(args.bias, label='bias') # match the biases for the science run match4sci = args.bias.cross_check(args.sci, ['binning', 'mode'], 0) # for bias in args.bias: # bias._needs_flip = bias.cross_check(args.sci[0], 'flip_state') # NOTE: THIS MAY BE INEFICIENT IF THE FIRST CUBE IS THE ONLY ONE WITH A DIFFERENT FLIP STATE... # args.bias[match4sci].flag_sub(args.sci) ? args.bias.flag_sub(args.sci) args.bias[match4sci].print_instrumental_setup( description='(for science frames)') # match the biases for the flat run if args.flats: match4flats = args.bias.cross_check(args.flats, ['binning', 'mode'], -1) # args.bias4flats = args.bias[match4flats] # for bias in args.bias4flats: # bias._needs_flip = bias.cross_check(args.flats[0], 'flip_state') # print table of bias frames args.bias[match4flats].print_instrumental_setup( description='(for flat fields)') match = match4sci & match4flats else: match = match4sci args.bias = args.bias[match] # check which of the given flats are potentially master # is_master = [f.ndims == 2 for f in args.flats] # else: # warn( 'No de-biasing will be done!' ) # =========================================================================== if args.split: if args.outdir[0]: # if an output directory is given args.outdir = os.path.abspath(args.outdir[0]) if not os.path.exists(args.outdir): # if it doesn't exist create it print( 'Creating reduced data directory {}.\n'.format( args.outdir)) os.mkdir(args.outdir) # =========================================================================== # Handle header updating here # NOTE: somehow, this attribute gets set even though we can never read it due to a syntax error delattr(head_info, 'update-headers') hi = head_info hi.coords = None # join arguments since they are read as lists hi.object = ' '.join(hi.object) hi.ra = ' '.join(hi.ra) hi.dec = ' '.join(hi.dec) hi.date = ' '.join(hi.date) if args.update_headers: if hi.ra and hi.dec: iocheck(hi.ra, validity.RA, 1) iocheck(hi.dec, validity.DEC, 1) hi.coords = SkyCoord(ra=hi.ra, dec=hi.dec, unit=('h', 'deg')) # , system='icrs' else: from pySHOC.utils import retrieve_coords_ra_dec hi.coords, hi.ra, hi.dec = retrieve_coords_ra_dec(hi.object) # TODO: maybe subclass SkyCoords to calculate this? def is_close(cooA, cooB, threshold=1e-3): return np.less([(cooA.ra - cooB.ra).value, (cooA.dec - cooB.dec).value], threshold).all() for cube in args.sci: # TODO: select instead of loop if cube.has_coords and hi.coords and not is_close(cube.coords, hi.coords): fmt = dict(style='hmsdms', precision=2, sep=' ', pad=1) warn( 'Supplied coordinates {} will supersede header coordinates {} in {}' ''.format(hi.coords.to_string(**fmt), cube.coords.to_string(**fmt), cube.filename())) cube.coords = hi.coords if not hi.date: # hi.date = args.sci[0].date#[c.date for c in args.sci] warn('Dates will be assumed from file creation dates.') # if not hi.filter: # warn('Filter assumed as Empty') # hi.filter = 'Empty' # if hi.epoch: # iocheck(hi.epoch, validity.epoch, 1) # else: # warn('Assuming epoch J2000') # hi.epoch = 2000 # if not hi.obs: # note('Assuming location is SAAO Sutherland observatory.') # hi.obs = 'SAAO' # if not hi.tel: # note('Assuming telescope is SAAO 1.9m\n') #FIXME: Don't have to assume for new data # hi.tel = '1.9m' elif args.timing or args.split: # Need target coordinates for Barycentrization! Check the headers for cube in args.sci: # TODO: select instead of loop if cube.coords is None: warn('Object coordinates not found in header for {}!\n' 'Barycentrization cannot be done without knowing target ' 'coordinates!'.format(cube.filename())) # iocheck( hi.date, validity.DATE, 1 ) # else: # warn( 'Headers will not be updated!' ) # =========================================================================== # if args.timing and not hi.coords: # Target coordinates not provided / inferred from # warn( 'Barycentrization cannot be done without knowing target coordinates!' ) if args.names: shocFlatFieldRun.nameFormat = names.flats shocBiasRun.nameFormat = names.bias shocSciRun.nameFormat = names.sci # ANIMATE return args, head_info, names