Exemple #1
0
 def events_index():
     now = gefolge_web.util.now()
     future_events, past_events = more_itertools.partition(
         lambda event: event.end is not None and event.end < now,
         gefolge_web.event.model.Event)
     future_events, current_events = more_itertools.partition(
         lambda event: event.start is not None and event.start < now,
         future_events)
     return {
         'current_events': sorted(current_events),
         'future_events': sorted(future_events),
         'past_events': sorted(past_events, reverse=True)
     }
Exemple #2
0
def more_itertools_partition():

    obj1 = [1, 2, 3, 4, 5, 10, 8]

    lst_a, lst_b = partition(lambda x: x % 2 == 0, obj1)
    print(list(lst_a))
    print(list(lst_b))
Exemple #3
0
                def sort_line_chars(
                        chars: Sequence[PDFChar],
                        interpreter: PDFPageInterpreter) -> Sequence[PDFChar]:
                    chars = (normalize_char(char, interpreter)
                             for char in chars)
                    chars = sorted(chars, key=lambda char: char["x0"])
                    main_chars, combining_chars = partition(
                        lambda char: char["text"] and unicodedata.combining(
                            char["text"]), chars)
                    combining_chars_iter = peekable(iter(combining_chars))
                    for main_char in main_chars:
                        yield main_char

                        while combining_chars_iter:
                            combining_char = combining_chars_iter.peek()

                            overlap = max(
                                min(main_char["x1"], combining_char["x1"]) -
                                max(main_char["x0"], combining_char["x0"]), 0)
                            if overlap < main_char["width"] * Decimal("0.5"):
                                break

                            yield combining_char
                            next(combining_chars_iter, None)

                    assert (next(combining_chars_iter, None) is None)

                    return
                    yield
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        self.add_config_to_report()
        self.check_email_domain_missing()
        if not self.should_skip_this_run:
            # Initialize the checkpoints
            self._init_checkpoints()
            # Generate the workunits.
            access_events = self._get_snowflake_history()
            aggregated_info_items_raw, operation_aspect_work_units_raw = partition(
                lambda x: isinstance(x, MetadataWorkUnit),
                self._aggregate_access_events(access_events),
            )
            for wu in cast(Iterable[MetadataWorkUnit], operation_aspect_work_units_raw):
                self.report.report_workunit(wu)
                yield wu
            aggregated_info_items = list(aggregated_info_items_raw)
            assert len(aggregated_info_items) == 1

            for time_bucket in cast(
                AggregatedAccessEvents, aggregated_info_items[0]
            ).values():
                for aggregate in time_bucket.values():
                    wu = self._make_usage_stat(aggregate)
                    self.report.report_workunit(wu)
                    yield wu
Exemple #5
0
    async def handle_addresses(self, org_units, filename):
        addresses = map(attrgetter("post_address"), org_units)
        addresses = set(filter(None.__ne__, addresses))

        if len(addresses) == 0:
            return

        address_lookups = await dar_helper.dar_datavask_multiple(addresses)

        # Split into two lists where lookup succeeded and failed
        success, failure = partition(lambda x: x[1] is None, address_lookups)

        success = dict(success)
        print(f"{len(success)} addresses found")
        self.dar_cache.update(success)

        failed_addresses = set(map(itemgetter(0), failure))
        print(f"{len(failed_addresses)} addresses could not be found")
        if failed_addresses:
            # Join `failed_addresses` with `org_units` on `post_address` to get
            # `org_uuid` for each failed address lookup.
            joined: List[FailedDARLookup] = [
                FailedDARLookup(
                    org_uuid=org_unit.org_uuid,
                    post_address=org_unit.post_address,
                ) for org_unit in org_units
                if org_unit.post_address in failed_addresses
            ]
            self.write_failed_addresses(joined, filename)
Exemple #6
0
def filter_units(units, filter_ids):
    """Splits units into two based on filter_ids.

    Partitions the units such that no unit with a parent-id in filter_ids exist in one list.
    Any unit filtered like that is put in the other list.

    Example:
        >>> units = [(1, None), (2, 1), (3, 1), (4, 2), (5, 2), (6, 3), (7, 5)]
        >>> tup_to_unit = lambda tup: {'@id': tup[0], 'parentOrgUnit': tup[1]}
        >>> units = list(map(tup_to_unit, units))
        >>> get_ids = lambda units: list(map(itemgetter('@id'), units))
        >>> a, b = filter_units(units, [1])
        >>> get_ids(a)
        [1, 2, 3, 4, 5, 6, 7]
        >>> get_ids(b)
        []
        >>> a, b = filter_units(units, [2])
        >>> get_ids(a)
        [2, 4, 5, 7]
        >>> get_ids(b)
        [1, 3, 6]
        >>> a, b = filter_units(units, [3])
        >>> get_ids(a)
        [3, 6]
        >>> get_ids(b)
        [1, 2, 4, 5, 7]
        >>> a, b = filter_units(units, [3, 5])
        >>> get_ids(a)
        [3, 5, 6, 7]
        >>> get_ids(b)
        [1, 2, 4]
        >>> a, b = filter_units(units, [3, 7])
        >>> get_ids(a)
        [3, 6, 7]
        >>> get_ids(b)
        [1, 2, 4, 5]

    Args:
        units: List of units
        filter_ids: List of unit IDs to filter parents on

    Returns:
        list: List of units, with some filtered out
    """
    def get_parent(parent_map, entry):
        """Build a list of parents."""
        parent = parent_map.get(entry, None)
        if parent is None:
            return [entry]
        return [entry] + get_parent(parent_map, parent)

    parent_map = dict(map(itemgetter("@id", "parentOrgUnit"), units))
    filter_set = set(filter_ids)

    def is_disjoint_from_filter_ids(unit):
        """Test for overlap between parents and filter_set."""
        parent_set = set(get_parent(parent_map, unit["@id"]))
        return parent_set.isdisjoint(filter_set)

    return partition(is_disjoint_from_filter_ids, units)
Exemple #7
0
    async def handle_edit(self, filename: str, filedate: datetime):
        """
        Handle changes to existing org units and details
        We are guaranteed to only have one row per org unit

        New details on an existing org unit will show up in this file, rather than the
        'nye' file. So we have to potentially perform inserts of new data.

        As a row contains information about the org unit as well as its details,
        we do not know what has been changed. However, all information is managed
        by the external system so we can safely reimport the "same" data, as opposed to
        trying to compare the existing objects in OS2mo
        """
        org_units = los_files.read_csv(filename, OrgUnit)
        org_unit_payloads = self.create_unit_payloads(org_units)
        detail_payloads = await self.create_detail_payloads(org_units)

        orgfunk_uuids = set(await util.lookup_organisationfunktion())
        detail_creates, detail_edits = partition(
            lambda payload: payload["uuid"] in orgfunk_uuids, detail_payloads)
        converter = partial(mo_payloads.convert_create_to_edit,
                            from_date=filedate.date().isoformat())
        edits = map(converter, chain(org_unit_payloads, detail_edits))

        async with util.get_client_session() as session:
            await util.create_details(session, detail_creates)
            await util.edit_details(session, edits)
Exemple #8
0
    def get_workunits(self) -> Iterable[MetadataWorkUnit]:
        clients = self._make_bigquery_clients()
        bigquery_log_entries = self._get_bigquery_log_entries(clients)
        parsed_bigquery_log_events = self._parse_bigquery_log_entries(
            bigquery_log_entries)
        parsed_events_uncasted: Iterable[Union[ReadEvent, QueryEvent,
                                               MetadataWorkUnit]]
        last_updated_work_units_uncasted: Iterable[Union[ReadEvent, QueryEvent,
                                                         MetadataWorkUnit]]
        parsed_events_uncasted, last_updated_work_units_uncasted = partition(
            lambda x: isinstance(x, MetadataWorkUnit),
            parsed_bigquery_log_events)
        parsed_events: Iterable[Union[ReadEvent, QueryEvent]] = cast(
            Iterable[Union[ReadEvent, QueryEvent]], parsed_events_uncasted)
        last_updated_work_units: Iterable[MetadataWorkUnit] = cast(
            Iterable[MetadataWorkUnit], last_updated_work_units_uncasted)
        if self.config.include_operational_stats:
            for wu in last_updated_work_units:
                self.report.report_workunit(wu)
                yield wu
        hydrated_read_events = self._join_events_by_job_id(parsed_events)
        aggregated_info = self._aggregate_enriched_read_events(
            hydrated_read_events)

        for time_bucket in aggregated_info.values():
            for aggregate in time_bucket.values():
                wu = self._make_usage_stat(aggregate)
                self.report.report_workunit(wu)
                yield wu
Exemple #9
0
    async def stats(self, ctx):
        """Shows some general statistics about the bot.

        Do not confuse this with `{prefix}about` which is just the
        general info. This is just numbers.
        """

        bot = self.bot
        command_map = itertools.starmap('{1} {0}'.format, bot.command_counter.most_common())
        command_stats = '\n'.join(command_map) or 'No stats yet.'
        extension_stats = '\n'.join(f'{len(set(getattr(bot, attr).values()))} {attr}'
                                    for attr in ('cogs', 'extensions'))

        with self.process.oneshot():
            memory_usage_in_mb = self.process.memory_full_info().uss / 1024**2
            cpu_usage = self.process.cpu_percent() / psutil.cpu_count()

        uptime_seconds = bot.uptime.total_seconds()
        average_messages = bot.message_counter / uptime_seconds
        message_field = f'{bot.message_counter} messages\n({average_messages :.2f} messages/sec)'

        text, voice = partition(lambda c: isinstance(c, discord.TextChannel), bot.get_all_channels())
        presence = (f"{bot.guild_count} Servers\n{ilen(text)} Text Channels\n"
                    f"{ilen(voice)} Voice Channels\n{bot.user_count} Users")

        chiaki_embed = (discord.Embed(description=bot.appinfo.description, colour=self.bot.colour)
                        .set_author(name=str(ctx.bot.user), icon_url=bot.user.avatar_url)
                        .add_field(name='Modules', value=extension_stats)
                        .add_field(name='CPU Usage', value=f'{cpu_usage}%\n{memory_usage_in_mb :.2f}MB')
                        .add_field(name='Messages', value=message_field)
                        .add_field(name='Presence', value=presence)
                        .add_field(name='Commands', value=command_stats)
                        .add_field(name='Uptime', value=self.bot.str_uptime.replace(', ', '\n'))
                        )
        await ctx.send(embed=chiaki_embed)
    def _parse(self, args: GcsfsIngestArgs,
               contents_handle: GcsfsFileContentsHandle) -> IngestInfo:
        # Preprocess raw data.
        df = pd.read_csv(contents_handle.local_file_path, dtype=str).fillna('')
        df = df[df['Custody Status'] != 'Released']
        # People who are rearrested can have multiple bonds for the same charge;
        # the bond with the greatest ID is the most current one.
        df = df[df.groupby('Charge ID')['BondID'].transform('max') ==
                df['BondID']]
        # Some people have booking number collisions, so make these unique
        # per person.
        df['Booking Number'] += ' (Individual ID: ' + df['Individual ID'] + ')'

        ingest_info = super()._parse(
            args,
            self.DataFrameContentsHandle(contents_handle.local_file_path, df))

        # Postprocess IngestInfo
        for charge in ingest_info.get_all_charges():
            if charge.name:
                charge_parts = charge.name.lstrip('*').split('/')
                name, notes = partition(self._is_charge_note, charge_parts)
                charge.name = '/'.join(name)
                charge.charge_notes = '/'.join(notes)

            if charge.degree:
                match = re.match(r'CLASS (.+) MISDEMEANOR', charge.degree,
                                 re.IGNORECASE)
                if match:
                    charge.level = match.group(1)
                    charge.degree = None
                elif 'STATE JAIL' in charge.degree.upper():
                    charge.level = charge.degree
                    charge.degree = None
        return ingest_info
Exemple #11
0
def get_schema_graph(db: PartitionedDatabase) -> JsonDict:
    with db.transaction() as tx:
        structure = db.graph_schema_structure_tx(tx)
        models: List[Model] = structure.models
        model_ids: List[ModelId] = [m.id for m in models]
        property_counts: Dict[ModelId, int] = db.get_property_counts_tx(tx, model_ids)
        one_to_one, one_to_many = partition(
            lambda r: r.one_to_many, structure.relationships
        )

        legacy_models = [
            dict(type="concept", **to_concept_dict(m, property_counts[m.id]))
            for m in models
        ]
        legacy_model_relationships = [
            dict(type="schemaRelationship", **to_legacy_relationship(r))
            for r in one_to_many
        ]
        legacy_schema_linked_properties = [
            to_schema_linked_property(r) for r in one_to_one
        ]

        return (
            legacy_models + legacy_model_relationships + legacy_schema_linked_properties
        )
Exemple #12
0
 def partition_by_eligibility(charges: Tuple[Charge, ...]):
     ineligible_charges_generator, eligible_charges_generator = partition(
         lambda c: c.expungement_result.charge_eligibility.status == ChargeEligibilityStatus.ELIGIBLE_NOW
         if c.expungement_result.charge_eligibility
         else False,
         charges,
     )
     return list(eligible_charges_generator), list(ineligible_charges_generator)
Exemple #13
0
    def parse(self) -> list[Ast]:
        successes, fails = partition(pred=lambda result: result.item is None,
                                     iterable=self._parse())

        for fail in fails:
            logger.debug(fail.error)

        return [success.item for success in successes]  # type: ignore
Exemple #14
0
 def sort_conditions(self, *conditions, allow_having=True):
     having_list = []
     where_list = []
     not_comps, comps = partition(lambda c: isinstance(c, SQLComparison),
                                  conditions)
     where, having = partition(lambda c: c.aggregate, comps)
     if not self.allow_having and having:
         raise SchemaError("'HAVING' can't be in an UPDATE statement.")
     if not allow_having and having:
         raise SchemaError("'HAVING' can't be an 'OR' condition.")
     where_list.extend(where)
     having_list.extend(having)
     for c in not_comps:
         where_list.append(self.sort_conditions(*c)[0])
     if allow_having:
         return tuple(where_list), tuple(having_list)
     return tuple(where_list)
Exemple #15
0
def run_scanning_analyzers(data,
                           analyzers: Sequence[Analyzer],
                           aggregate_with=None,
                           save_state_with=None) -> AnalyzerContext:

    others, shareable = partition(
        lambda a: isinstance(a, ScanShareableAnalyzer), analyzers)
    shareable_list: List[ScanShareableAnalyzer] = cast(
        List[ScanShareableAnalyzer], list(shareable))

    def merge_aggregations(aggregations_list: List[AggDefinition]):
        ma = defaultdict(set)  # type: ignore
        for ags in aggregations_list:
            for k in ags:
                ma[k] = ma[k] | ags[k]
        return dict(ma)

    # Compute aggregation functions of shareable analyzers in a single pass over
    # the data
    # On Pandas this does not make a lot of sense
    results = None
    metrics_by_analyzer: Dict[Analyzer, Metric] = {}
    if len(shareable_list):
        try:
            # aggregations =
            # list(flatten(a._aggregation_functions() for a in shareable))
            # This is a dic with column -> aggregation lists
            merged_aggregations = merge_aggregations(
                [a._aggregation_functions() for a in shareable_list])
            # aggregations_names = list(flatten(list(merged_aggregations.values())))

            # Compute offsets so that the analyzers can correctly pick their results
            # from the row
            # FIXME: Note that this only works if the aggregation does not generates
            # from now on internally the analyzers will use the function name so the
            # offset is not used (at least for the pandas implementation)
            agg_functions = [0] + [
                len(a._aggregation_functions()) for a in shareable_list
            ]
            offsets = list(accumulate(agg_functions, lambda a, b: a + b))[:-1]
            results = data.agg(merged_aggregations)
            for an, offset in zip(shareable_list, offsets):
                metrics_by_analyzer[an] = _success_or_failure_metric_from(
                    an, results, offset)

        except Exception as e:
            metrics_by_analyzer = {
                a: a.to_failure_metric(e)
                for a in analyzers
            }

        analyzer_context = AnalyzerContext(metrics_by_analyzer)
    else:
        analyzer_context = AnalyzerContext()

    # TODO: Run not shareable analyzers

    return analyzer_context
Exemple #16
0
def set_up_pools(npools, nsamples, nreplicates):
    pool_size    = int(np.ceil(nsamples * nreplicates / npools))
    pool_cnt     = [pool_size] * npools           # count free slots per pool
    pool_cnt_sum =  pool_size  * npools
    pool_idx     = list(range(npools))            # [0, ..., npools-1]
    pool_probs   = [0] * npools                   # init list of length npools
    pool_log     = [set() for _ in range(npools)] # which sample in which pool?

    try:
        for sample in range(nsamples):
            # Weigh pools by number of free sample slots.
            # Obtain probs for each pool by dividing by total # of free slots.
            for pool, count in enumerate(pool_cnt):
                pool_probs[pool] = count / pool_cnt_sum

            # Choose nreplicates many distinct pool.
            address = np.random.choice(
                pool_idx, nreplicates, replace=False, p=pool_probs)

            # Reduce free slots for chosen pools and add samples to pool_log.
            for pool in address:
                pool_cnt[pool] -= 1         # once 0, it wont be sampled again
                pool_log[pool].add(sample)

            pool_cnt_sum -= nreplicates     # update sum of free slots

    except ValueError:
        # We had bad luck with sampling, there are less than nreplicates
        # many non-full pools (but we do have enough slots in the remaining
        # pools!).
        nonfull_pools, full_pools = [set(iter) for iter in partition(
                                     lambda i: pool_cnt[i]==0, pool_idx)]

        # Process all remaining samples.
        for sample in range(sample, nsamples):
            nnon_full = len(nonfull_pools)

            # Take as many non-full pools as we have, and add as many full
            # pools as neccessary to get enough replicates.
            address = list(nonfull_pools) + list(np.random.choice(
                    list(full_pools), nreplicates-nnon_full, replace=False))

            # Add samples to pool_log as above.
            for pool in address:
                pool_log[pool].add(sample)

            # Book-keeping: keep track of new full pools.
            new_full_pools = set()
            for pool in nonfull_pools:
                pool_cnt[pool] -= 1
                if pool_cnt[pool] == 0:         # pool is now full
                    new_full_pools.add(pool)
            full_pools.update(new_full_pools)
            nonfull_pools -= new_full_pools

    return pool_log
Exemple #17
0
def fvalidate_tickets(rules: dict, nearby: List[str]) -> Tuple:
    # So much more verbose to not do this imperatively
    validator = partial(validate_ticket, rules)
    validated = lmap(validator, nearby)
    invalid, valid = lmap(list, partition(lambda x: x[0], validated))
    valid_tickets = [_[1] for _ in valid]
    invalid_tickets = [_[1] + _[2] for _ in invalid]
    invalid_nums = list(concat([_[2] for _ in invalid]))
    valid_nums = list(concat([_[1] for _ in valid]))
    return valid_tickets, invalid_tickets, valid_nums, invalid_nums
Exemple #18
0
 def _params_by_weight_decay(self, model, **kwargs):
     no_decay_kwargs = deepcopy(kwargs)
     no_decay_kwargs['weight_decay'] = 0.0
     named_weights, named_biases = partition(
         _is_bias_namedparam, named_trainable_parameters(model))
     yield {'params': map(lambda x: x[-1], named_weights), **kwargs}
     if named_biases:
         yield {
             'params': map(lambda x: x[-1], named_biases),
             **no_decay_kwargs
         }
        def name_values():
            for thing in map(
                    list,
                    partition(lambda e: isinstance(e, discord.TextChannel),
                              entries)):
                if not thing:
                    continue

                name = f'{_get_class_name(thing[0])}{"s" * (len(thing) != 1)}'
                value = truncate(', '.join(map(str, thing)), 1024, '...')
                yield name, value
Exemple #20
0
def two_columns_table(stats, split_by):
    # type: (Dict, Callable[Tuple[Any,Any], bool]) -> str
    """
    Split the items by a filter function "split_by" and merge
    Example:
        two_columns_table({1: "a", 2: "b", 3: "c", 4: "d"}, is_odd)
        [ 2 , b , 1 , a ]
        [ 3 , d , 3 , c ]
    """
    splatted = partition(split_by, stats.items())
    flatten_merged_rows = map(collapse, zip_longest(*splatted))
    return safe_tabulate(flatten_merged_rows, headers=())
def pytest_collection_modifyitems(items, config) -> None:
    """Deselect any items marked "full" unless the --full flag is set."""

    normal, interactive = partition(
        lambda item: bool(item.get_closest_marker("interactive")), items)

    select, deselect = (interactive,
                        normal) if config.option.interactive else (normal,
                                                                   interactive)

    config.hook.pytest_deselected(items=deselect)
    items[:] = select
Exemple #22
0
def configure_whitenoise(config):
    not_security_middleware, security_middleware = partition(
        lambda item: "security" in item.lower(), config['MIDDLEWARE'])

    config['MIDDLEWARE'] = list(security_middleware) + \
        ['whitenoise.middleware.WhiteNoiseMiddleware'] + \
        list(not_security_middleware)

    # Configuration for whitenoise
    config['STATIC_ROOT'] = config['STATIC_PATH']
    config[
        'STATICFILES_STORAGE'] = 'whitenoise.storage.CompressedManifestStaticFilesStorage'
Exemple #23
0
def build_func_parameter_values(task_definition, task_args, task_kwargs):
    # type: (TaskDefinition, List[Any], Dict[str, Any]) -> List[ParameterValue]
    """
    In tracking task we need to build params without definitions.
    Those params value need no calculations and therefore are very easy to construct
    """
    callable_spec = (task_definition.task_decorator.get_callable_spec()
                     )  # type: CallableSpec
    values = []

    # convert any arg to kwarg if possible
    args, task_kwargs = args_to_kwargs(callable_spec.args, task_args,
                                       task_kwargs)

    # the parameter of the * argument
    if callable_spec.varargs:
        # build the parameter value for the varargs
        vargs_param = build_user_parameter_value(
            callable_spec.varargs,
            tuple(args),
            source=task_definition.full_task_family_short,
        )
        values.append(vargs_param)

    # distinguish between the parameter we expect to create vs those we don't
    unknown_kwargs, known_kwargs = more_itertools.partition(
        lambda kwarg: kwarg[0] in callable_spec.args,
        six.iteritems(task_kwargs))

    # the parameter of the ** argument
    if callable_spec.varkw:
        # build the parameter value for the varkw
        varkw_param = build_user_parameter_value(
            callable_spec.varkw,
            dict(unknown_kwargs),
            source=task_definition.full_task_family_short,
        )
        values.append(varkw_param)

    # Exclude the self param just like it's excluded in DecoratedCallableParamBuilder
    excluded_params = {"self"}

    for name, value in known_kwargs:
        if name in excluded_params:
            continue

        # build the parameters for the expected parameters
        param_value = build_user_parameter_value(
            name, value, source=task_definition.full_task_family_short)
        values.append(param_value)

    return values
Exemple #24
0
def split_employees_leaves(employees: List[Dict]) -> Tuple[Iterable, Iterable]:
    """Split list of employees into two iterables, with either active employees or terminated employees

    >>> e = [{'@action': "test"}, {'@action': "leave"}]
    >>> e1, e2 = split_employees_leaves(e)
    >>> list(e1)
    [{'@action': 'test'}]

    >>> list(e2)
    [{'@action': 'leave'}]
    """
    is_leave = lambda empl: empl.get("@action") == "leave"
    return partition(is_leave, employees)
Exemple #25
0
def evolve(tiles):
    new_tiles = set()
    white_tiles = Counter()

    for tile in tiles:
        white_neighbours, black_neighbours = mit.partition(
            tiles.__contains__, neighbours(tile))
        white_tiles.update(white_neighbours)

        if 0 < mit.ilen(black_neighbours) <= 2:
            new_tiles.add(tile)

    new_tiles.update(t for t in white_tiles if white_tiles[t] == 2)
    return new_tiles
Exemple #26
0
 def next(self):
     new = GridPart1()
     inactive_seen = set()
     for node in self.active:
         inactive, active = mit.partition(self.active.__contains__,
                                          GridPart1.neighbours(node))
         if mit.ilen(active) in (2, 3):
             new.active.add(node)
         for neighbour in set(inactive) - self.active - inactive_seen:
             inactive_seen.add(neighbour)
             if len(set(GridPart1.neighbours(neighbour))
                    & self.active) == 3:
                 new.active.add(neighbour)
     return new
Exemple #27
0
def main():
    with open("input") as lines:
        lines = [line.strip() for line in lines]

    incomplete, corrupted = partition(
        lambda value: isinstance(value, Corrupted),
        map(analyse, lines),
    )

    syntax_error_score = sum(mismatch.score for mismatch in corrupted)
    print(syntax_error_score)
    autocomplete_score = median(autocompletion.score
                                for autocompletion in incomplete)
    print(autocomplete_score)
Exemple #28
0
def pick_toggled(values):
    bases, toggles = partition(
        lambda x: isinstance(x, Toggle),
        values,
    )

    toggle = first(toggles, Toggle(False))

    try:
        base = first(bases)
    except ValueError:
        raise NoValueFound("can't find base value to toggle", values.log)

    return toggle.value != base
    def expand_primer_extension_products(self,
                                         only_one_required=False,
                                         lim_size=True):
        primers = self.get_alignments_by_types(Constants.PRIMER)

        rev, fwd = partition(lambda p: p.subject_region.direction == 1,
                             primers)
        fwd, fwd_keys = sort_with_keys(fwd, key=lambda p: p.query_region.b)
        rev, rev_keys = sort_with_keys(rev, key=lambda p: p.query_region.a)
        pairs = []
        for f in fwd:
            rev_bind_region = f.query_region[:-Config.PRIMER_MIN_BIND]
            rev_bind = self.filter_alignments_by_span(
                rev, rev_bind_region, key=lambda p: p.query_region.a)
            rev_bind, rkeys = sort_with_keys(rev_bind,
                                             key=lambda p: p.query_region.a)

            for r in rev_bind:
                if r.query_region.b in f.query_region:
                    if r.query_region.b == f.query_region.b:
                        pass
                    else:
                        continue
                pairs += self._create_primer_extension_alignment(
                    f,
                    r,
                    Constants.PRIMER_EXTENSION_PRODUCT_WITH_PRIMERS,
                    lim_size=lim_size,
                )

        if only_one_required:
            for f in fwd:
                # existing fwd primer
                pairs += self._create_primer_extension_alignment(
                    f,
                    None,
                    Constants.PRIMER_EXTENSION_PRODUCT_WITH_LEFT_PRIMER,
                    lim_size=lim_size,
                )

            for r in rev:
                # existing fwd primer
                pairs += self._create_primer_extension_alignment(
                    None,
                    r,
                    Constants.PRIMER_EXTENSION_PRODUCT_WITH_RIGHT_PRIMER,
                    lim_size=lim_size,
                )
        return pairs
Exemple #30
0
def traverse_linop(op: Callable[[Expr], Expr], ccheck: Callable[[Expr], bool],
                   expr: Expr) -> Expr:
    """
    Traverses an expression tree applying op with the linear operator rules
    """
    rself: Callable[[Expr], Expr] = partial(traverse_linop, op, ccheck)
    # sub1 + sub2
    if expr.func == Add:
        return Add(*map(rself, expr.args))
    # const * sub1 * sub2
    if expr.func == Mul:
        variables, [*constants] = partition(ccheck, expr.args)
        if len(constants) > 0:
            return Mul(*constants, rself(Mul(*variables)))
    return op(expr)
 def test_bool(self):
     """Test when pred() returns a boolean"""
     lesser, greater = mi.partition(lambda x: x > 5, range(10))
     self.assertEqual(list(lesser), [0, 1, 2, 3, 4, 5])
     self.assertEqual(list(greater), [6, 7, 8, 9])
 def test_arbitrary(self):
     """Test when pred() returns an integer"""
     divisibles, remainders = mi.partition(lambda x: x % 3, range(10))
     self.assertEqual(list(divisibles), [0, 3, 6, 9])
     self.assertEqual(list(remainders), [1, 2, 4, 5, 7, 8])
Exemple #33
0
def resolve_args(namespaces):
    from pathlib import Path

    # TODO: make so that we can run without science frames
    args, head_info, names = namespaces

    # Positional argument and -s / --sci argument mean the same thing
    if args.files_or_directory and not args.sci:
        args.sci = args.files_or_directory

    if args.outdir:
        # output directory given explicitly
        args.outdir = iocheck(args.outdir, os.path.exists, 1)
    # else:
    # infer output directory from images provided

    # If input is a directory, process all files in tree!
    # If outdir given, rebuild the tree for reduced files there.  Otherwise
    # maintain current tree for reduced files.
    from pySHOC import treeops

    root = Path(args.sci[0])
    if root.is_dir():  # this is a directory try process entire tree!
        _infer_indir = False
        # first check if still has day-by-day folders
        # if next(args.sci[0].glob('[01][0-9][0-3][0-9]'), None):
        #     # try to partition
        #     treeops.partition_by_source(args.sci[0])

        # get file tree
        tree = treeops.get_tree(root, '.fits')
        flats = tree.pop('flats', tree.pop('flat', None))
        bias = tree.pop('bias', None)
        if not args.flats:
            args.flats = flats
        if not args.bias:
            args.bias = bias

        # flatten the tree into list of files
        args.sci = list(mit.flatten(tree.values()))

    else:
        _infer_indir = True

    # Resolve inputs and get the input folder form resolved file list for
    # sci / flats / bias
    _infer_outdir = not bool(args.outdir)
    work_dir = ''
    for name in ('sci', 'flats', 'bias'):  # args.dark   # 'sci',
        images = getattr(args, name)
        if images:
            # Resolve the input images
            images = parse.to_list(images,
                                   os.path.exists,
                                   include='*.fits',
                                   path=work_dir,
                                   abspaths=True,
                                   raise_error=1)
            # put resolved list in arg namespace
            setattr(args, name, images)
            if _infer_indir:
                work_dir = Path(images[0]).parent
                _infer_indir = False

            if _infer_outdir:
                args.outdir = os.path.split(images[0])[0]
                _infer_outdir = False

    # All inputs should now be resolved to lists of file names
    if args.sci:
        # Initialize Run
        args.sci = shocSciRun.load(args.sci, label='science')
        # TODO: use kind and set that as label default?

        # for cube in args.sci:  # DO YOU NEED TO DO THIS IN A LOOP?
        #     cube._needs_flip = not cube.cross_check(args.sci[0], 'flip_state')
        # self-consistency check for flip state of science cubes
        # #NOTE: THIS MAY BE INEFICIENT IF THE FIRST CUBE IS THE ONLY ONE WITH A DIFFERENT FLIP STATE...

    # ===========================================================================
    if args.gps:
        args.timing = True  # Do timing if gps info given

        if len(args.gps) == 1:
            # triggers give either as single trigger time string or filename of trigger list
            valid_gps = iocheck(args.gps[0], validity.RA,
                                raise_error=-1)  # if valid single time this will return that same str else None
            if not valid_gps:
                args.gps = parse.to_list(args.gps, validity.RA,
                                         path=work_dir,
                                         abspath=0,
                                         sort=0,
                                         raise_error=1)

        # at ths point args.gps is list of explicit time strings.
        # Check if they are valid representations of time
        args.gps = [iocheck(g, validity.RA, raise_error=1, convert=convert.RA)
                    for g in args.gps]

        # Convert and set as cube attribute
        args.sci.that_need_triggers().set_gps_triggers(args.gps)

        # if any cubes are GPS triggered on each individual frame
        grun = args.sci.that_need_kct()
        if len(args.kct) == 1 and len(grun) != 1:
            warn(
                    'A single GPS KCT provided for multiple externally triggered runs. '
                    'Assuming this applies for all these files: %s' % grun)
            args.kct *= len(grun)  # expand by repeating

        elif len(grun) != len(args.kct):
            l = str(len(args.kct)) or 'No'
            s = ': %s' % str(args.kct) if len(args.kct) else ''
            raise ValueError('%s GPS KCT values provided%s for %i file(s): %s'
                             '' % (l, s, len(grun), grun))

        # "Please specify KCT (Exposure time + Dead time):")
        # args.kct = InputCallbackLoop.str(msg, 0.04, check=validity.float, what='KCT')

        for cube, kct in zip(grun, args.kct):
            cube.timing.kct = kct

    # ===========================================================================
    if args.flats or args.bias:

        args.combine = list(map(str.lower, args.combine))
        hows = 'day', 'daily', 'week', 'weekly'
        methods = 'sigma clipped',
        funcs = 'mean', 'median'
        vocab = hows + methods + funcs
        transmap = dict(mit.grouper(hows, 2))
        understood, misunderstood = map(list, mit.partition(vocab.__contains__,
                                                            args.combine))
        if any(misunderstood):
            raise ValueError('Argument(s) {} for combine not understood.'
                             ''.format(misunderstood))
        else:
            understood = [transmap.get(u, u) for u in understood]

            how = next(filter(hows.__contains__, understood))
            func = next(filter(funcs.__contains__, understood))
            meth = next(filter(methods.__contains__, understood), '')

            args.combine = how
            args.fcombine = getattr(np, func)
            print('\nBias/Flat combination will be done by {}.'.format(
                    ' '.join([how, meth, func])))

            # TODO: sigma clipping ... even though it sucks

    # ===========================================================================
    if args.flats:
        # TODO full matching here ...

        # args.flats = parse.to_list(args.flats, imaccess, path=work_dir, raise_error=1)
        args.flats = shocFlatFieldRun.load(args.flats, label='flat')

        # isolate the flat fields that match the science frames. only these will be processed
        match = args.flats.cross_check(args.sci, 'binning', 1)
        args.flats = args.flats[match]

        # check which are master flats

        # for flat in args.flats:
        #     flat._needs_flip = not flat.cross_check(args.sci[0], 'flip_state')

        # flag the flats that need to be subframed, based on the science frames which are subframed
        args.flats.flag_sub(args.sci)

        args.flats.print_instrumental_setup()

        # check which of the given flats are potentially master
        # is_master = [f.ndims == 2 for f in args.flats]

        # else:
        # print('WARNING: No flat fielding will be done!')

    # ===========================================================================
    if args.bias:
        # args.bias = parse.to_list(args.bias, imaccess, path=work_dir, raise_error=1)
        args.bias = shocBiasRun.load(args.bias, label='bias')

        # match the biases for the science run
        match4sci = args.bias.cross_check(args.sci, ['binning', 'mode'], 0)
        # for bias in args.bias:
        #     bias._needs_flip = bias.cross_check(args.sci[0], 'flip_state')
        # NOTE: THIS MAY BE INEFICIENT IF THE FIRST CUBE IS THE ONLY ONE WITH A DIFFERENT FLIP STATE...
        # args.bias[match4sci].flag_sub(args.sci) ?
        args.bias.flag_sub(args.sci)
        args.bias[match4sci].print_instrumental_setup(
                description='(for science frames)')

        # match the biases for the flat run
        if args.flats:
            match4flats = args.bias.cross_check(args.flats, ['binning', 'mode'],
                                                -1)
            # args.bias4flats = args.bias[match4flats]
            # for bias in args.bias4flats:
            #     bias._needs_flip = bias.cross_check(args.flats[0], 'flip_state')

            # print table of bias frames
            args.bias[match4flats].print_instrumental_setup(
                    description='(for flat fields)')
            match = match4sci & match4flats
        else:
            match = match4sci

        args.bias = args.bias[match]

        # check which of the given flats are potentially master
        # is_master = [f.ndims == 2 for f in args.flats]

    # else:
    # warn( 'No de-biasing will be done!' )

    # ===========================================================================
    if args.split:
        if args.outdir[0]:  # if an output directory is given
            args.outdir = os.path.abspath(args.outdir[0])
            if not os.path.exists(args.outdir):  # if it doesn't exist create it
                print(
                        'Creating reduced data directory {}.\n'.format(
                            args.outdir))
                os.mkdir(args.outdir)

    # ===========================================================================
    # Handle header updating here

    # NOTE: somehow, this attribute gets set even though we can never read it due to a syntax error
    delattr(head_info, 'update-headers')

    hi = head_info
    hi.coords = None
    # join arguments since they are read as lists
    hi.object = ' '.join(hi.object)
    hi.ra = ' '.join(hi.ra)
    hi.dec = ' '.join(hi.dec)
    hi.date = ' '.join(hi.date)

    if args.update_headers:
        if hi.ra and hi.dec:
            iocheck(hi.ra, validity.RA, 1)
            iocheck(hi.dec, validity.DEC, 1)
            hi.coords = SkyCoord(ra=hi.ra, dec=hi.dec,
                                 unit=('h', 'deg'))  # , system='icrs'
        else:
            from pySHOC.utils import retrieve_coords_ra_dec
            hi.coords, hi.ra, hi.dec = retrieve_coords_ra_dec(hi.object)

        # TODO: maybe subclass SkyCoords to calculate this?
        def is_close(cooA, cooB, threshold=1e-3):
            return np.less([(cooA.ra - cooB.ra).value,
                            (cooA.dec - cooB.dec).value], threshold).all()

        for cube in args.sci:  # TODO: select instead of loop
            if cube.has_coords and hi.coords and not is_close(cube.coords,
                                                              hi.coords):
                fmt = dict(style='hmsdms', precision=2, sep=' ', pad=1)
                warn(
                        'Supplied coordinates {} will supersede header coordinates {} in {}'
                        ''.format(hi.coords.to_string(**fmt),
                                  cube.coords.to_string(**fmt),
                                  cube.filename()))
                cube.coords = hi.coords

        if not hi.date:
            # hi.date = args.sci[0].date#[c.date for c in args.sci]
            warn('Dates will be assumed from file creation dates.')

            # if not hi.filter:
            #     warn('Filter assumed as Empty')
            #     hi.filter = 'Empty'

            # if hi.epoch:
            #     iocheck(hi.epoch, validity.epoch, 1)
            # else:
            # warn('Assuming epoch J2000')
            # hi.epoch = 2000

            # if not hi.obs:
            # note('Assuming location is SAAO Sutherland observatory.')
            # hi.obs = 'SAAO'

            # if not hi.tel:
            #     note('Assuming telescope is SAAO 1.9m\n')   #FIXME: Don't have to assume for new data
            #     hi.tel = '1.9m'

    elif args.timing or args.split:
        # Need target coordinates for Barycentrization! Check the headers
        for cube in args.sci:  # TODO: select instead of loop
            if cube.coords is None:
                warn('Object coordinates not found in header for {}!\n'
                     'Barycentrization cannot be done without knowing target '
                     'coordinates!'.format(cube.filename()))

                # iocheck( hi.date, validity.DATE, 1 )
                # else:
                # warn( 'Headers will not be updated!' )

                # ===========================================================================
                # if args.timing and not hi.coords:
                # Target coordinates not provided / inferred from
                # warn( 'Barycentrization cannot be done without knowing target coordinates!' )

    if args.names:
        shocFlatFieldRun.nameFormat = names.flats
        shocBiasRun.nameFormat = names.bias
        shocSciRun.nameFormat = names.sci

    # ANIMATE

    return args, head_info, names