Python filter Examples, funcy.filter Python Examples

Example #1

0

Show file

File: helpers.py Project: PacificCommunity/ckanext-spc-pdh

def spc_national_map_previews(pkg):
    return F.filter(F.first, [(F.first(
        F.filter(
            _is_cesium_view,
            toolkit.get_action('resource_view_list')({
                'user': toolkit.c.user
            }, {
                'id': res['id']
            }))), res) for res in pkg['resources']])

Example #2

0

Show file

File: debug_cache.py Project: Suor/debug-cache

    def _load_call_info(self, dirname):
        path = os.path.join(self._path, dirname)
        files = os.listdir(path)

        arg_files = sorted(filter(r'^a', files))
        args = tuple(map(self._read_data, (os.path.join(path, f) for f in arg_files)))

        kwarg_files = filter(r'^k', files)
        kwarg_files = {filename[1:]: os.path.join(path, filename) for filename in kwarg_files}
        kwargs = walk_values(self._read_data, kwarg_files)

        return args, kwargs

Example #3

0

Show file

def get_minimum_metric_set_keys_from_mapping_file(name):
    """
    Returns the list of metrics that should be collected based from the container.
    These metrics are defined in mapping files for each image name.
    """
    path = os.path.join('mappings', name + '.yml')
    mappings = yaml.safe_load(util.get_asset_file_contents(path))

    is_mandatory_metric = lambda x: not funcy.get_in(x, 'optional', False)

    return list(
        map(lambda x: x['key'], funcy.filter(is_mandatory_metric, mappings)))

Example #4

0

Show file

File: gem_lib_harvester.py Project: PacificCommunity/ckanext-spc-pdh

def _map_gdl_to_publication(data_dict, obj):
    dataset = {
        "id": str(uuid.uuid3(uuid.NAMESPACE_DNS, str(data_dict['id']))),
        "type": "publications",
        "title": data_dict['title'],
        "creator": [a['name'] for a in data_dict['authors']],
        # "subject": data_dict,
        "notes": data_dict['description'],
        "publisher": data_dict.get('relatedOrganisation'),
        # "contributor": [a['name'] for a in data_dict['authors']],
        "date": data_dict.get('created'),
        "metadata_modified": data_dict.get('created'),
        # "publication_type": data_dict,
        # "format": data_dict,
        "identifier": data_dict['identifier'],
        "source": data_dict.get('source'),
        # "language": data_dict,
        # "relation": data_dict,
        # "spatial": data_dict,
        # "rights": data_dict,
        "license_id": 'notspecified',
        "member_countries": 'other',  # relatedCountry, optional
        "harvest_source": 'GDL'
    }
    thematic_area = data_dict.get('thematicArea', {}).get('area')
    if thematic_area:
        dataset["thematic_area_string"] = thematic_area_mapping.get(
            thematic_area)
    related_country = data_dict.get('relatedCountry')
    if related_country:
        schema = sh.scheming_get_dataset_schema('publications')
        choices = sh.scheming_field_by_name(schema['dataset_fields'],
                                            'member_countries')['choices']
        member_country = F.first(
            F.filter(
                F.compose(F.rpartial(contains, related_country),
                          itemgetter('label')), choices))
        if member_country:
            dataset['member_countries'] = member_country['value']
            spatial = get_extent_for_country(member_country['label'])
            if spatial:
                dataset['spatial'] = spatial['value']
    if data_dict['file']:
        res_url = _gl_url(obj.source.url, 'download') + '?id=' + str(
            data_dict['id'])
        res = {'name': data_dict['file'], 'url': res_url}
        res['format'] = splitext(res['name'])[1].lstrip('.')
        dataset['resources'] = [res]

    return dataset

Example #5

0

Show file

File: Zte.py Project: sjava/weihu

def get_onus(ip):
    mark, ports = get_pon_ports(ip)[:-1]
    if mark == 'fail':
        return ('fail', None, ip)
    try:
        child = telnet(ip)
        gpo = partial(get_port_onus, child)
        rslt = lmap(gpo, ports)
        child.sendline('exit')
        child.close()
    except (pexpect.EOF, pexpect.TIMEOUT) as e:
        return ('fail', None, ip)
    rslt1 = filter(lambda x: bool(x[1]), rslt)
    return ('success', rslt1, ip)

Example #6

0

Show file

def get_onus(ip):
    mark, ports = get_pon_ports(ip)[:-1]
    if mark == 'fail':
        return ('fail', None, ip)
    try:
        child = telnet(ip)
        gpo = partial(get_port_onus, child)
        rslt = lmap(gpo, ports)
        child.sendline('exit')
        child.close()
    except (pexpect.EOF, pexpect.TIMEOUT) as e:
        return ('fail', None, ip)
    rslt1 = filter(lambda x: bool(x[1]), rslt)
    return ('success', rslt1, ip)

Example #7

0

Show file

File: S85.py Project: sjava/weihu

def get_infs(ip):
    def _inf(record):
        name = re_find(r'interface\s+(X?Gigabit\S+)', record)
        desc = re_find(r'description\s+(\S+ *\S*)', record)
        group = re_find(r'link-aggregation\s+(group\s+\d+)', record)
        return dict(name=name, desc=desc, group=group)

    try:
        child = telnet(ip)
        rslt = do_some(child, 'disp cu interface')
        close(child)
    except (pexpect.EOF, pexpect.TIMEOUT) as e:
        return ('fail', None, ip)
    rslt1 = filter(r'X?GigabitEthernet', rslt.split('#'))
    rslt2 = map(_inf, rslt1)
    return ('success', rslt2, ip)

Example #8

0

Show file

File: S85.py Project: sjava/weihu

def get_infs(ip):
    def _inf(record):
        name = re_find(r'interface\s+(X?Gigabit\S+)', record)
        desc = re_find(r'description\s+(\S+ *\S*)', record)
        group = re_find(r'link-aggregation\s+(group\s+\d+)', record)
        return dict(name=name, desc=desc, group=group)

    try:
        child = telnet(ip)
        rslt = do_some(child, 'disp cu interface')
        close(child)
    except (pexpect.EOF, pexpect.TIMEOUT) as e:
        return ('fail', None, ip)
    rslt1 = filter(r'X?GigabitEthernet', rslt.split('#'))
    rslt2 = map(_inf, rslt1)
    return ('success', rslt2, ip)

Example #9

0

Show file

    def get_data(self):
        all_transactions = filter(
            lambda t: t["type"] in ("request-changes", "accept"),
            cat(pluck("transactions", self.raw_data)),
        )
        accept_transactions, reject_transactions = split(
            lambda t: t["type"] == "accept", all_transactions)
        most_accepting_author, most_accepting_count = Counter(
            count_by(itemgetter("authorPHID"),
                     accept_transactions)).most_common(1)[0]
        most_rejecting_author, most_rejecting_count = Counter(
            count_by(itemgetter("authorPHID"),
                     reject_transactions)).most_common(1)[0]

        return (
            {
                "author": self.users_mapping[most_accepting_author],
                "count": most_accepting_count,
            },
            {
                "author": self.users_mapping[most_rejecting_author],
                "count": most_rejecting_count,
            },
        )

Example #10

0

Show file

def tfilter(f,*seq):
    return tuple(F.filter(f,*seq)) if seq \
    else lambda *xs: tuple(F.filter(f,*xs))

Example #11

0

Show file

File: helpers.py Project: PacificCommunity/ckanext-spc-pdh

def get_extent_for_country(country):
    spatial = F.first(
        F.filter(F.compose(F.partial(eq, country), itemgetter('text')),
                 get_eez_options()))
    return spatial

Example #12

0

Show file

def scan_etl_events(etl_id,
                    selected_columns: Optional[Iterable[str]] = None) -> None:
    """
    Scan for all events belonging to a specific ETL.

    If a list of columns is provided, then the output is limited to those columns.
    But note that the target (schema.table) and the event are always present.
    """
    ddb = DynamoDBStorage.factory()
    table = ddb.get_table(create_if_not_exists=False)
    available_columns = [
        "target", "step", "event", "timestamp", "elapsed", "rowcount"
    ]
    if selected_columns is None:
        selected_columns = available_columns
    # We will always select "target" and "event" to have a meaningful output.
    columns = list(
        fy.filter(
            frozenset(selected_columns).union(["target", "event"]),
            available_columns))
    keys = [
        "extra.rowcount" if column == "rowcount" else column
        for column in columns
    ]

    # We need to scan here since the events are stored by "target" and not by "etl_id".
    # TODO Try to find all the "known" relations and query on them with a filter on the etl_id.
    client = boto3.client("dynamodb")
    paginator = client.get_paginator("scan")
    response_iterator = paginator.paginate(
        TableName=table.name,
        ConsistentRead=False,
        ExpressionAttributeNames={"#timestamp": "timestamp"},
        ExpressionAttributeValues={
            ":etl_id": {
                "S": etl_id
            },
            ":marker": {
                "S": _DUMMY_TARGET
            },
            ":start_event": {
                "S": STEP_START
            },
        },
        FilterExpression=
        "etl_id = :etl_id and target <> :marker and event <> :start_event",
        ProjectionExpression=
        "target, step, event, #timestamp, elapsed, extra.rowcount",
        ReturnConsumedCapacity="TOTAL",
        # PaginationConfig={
        #     "PageSize": 100
        # }
    )
    logger.info("Scanning events table '%s' for elapsed times", table.name)
    consumed_capacity = 0.0
    scanned_count = 0
    rows: List[List[str]] = []
    deserialize = TypeDeserializer().deserialize

    for response in response_iterator:
        consumed_capacity += response["ConsumedCapacity"]["CapacityUnits"]
        scanned_count += response["ScannedCount"]
        # We need to turn something like "'event': {'S': 'finish'}" into "'event': 'finish'".
        deserialized = [{
            key: deserialize(value)
            for key, value in item.items()
        } for item in response["Items"]]
        # Lookup "elapsed" or "extra.rowcount" (the latter as ["extra", "rowcount"]).
        items = [{key: fy.get_in(item, key.split("."))
                  for key in keys} for item in deserialized]
        # Scope down to selected keys and format the columns.
        rows.extend([_format_output_column(key, item[key]) for key in keys]
                    for item in items)

    logger.info("Scan result: scanned count = %d, consumed capacity = %f",
                scanned_count, consumed_capacity)
    if "timestamp" in keys:
        rows.sort(key=itemgetter(keys.index("timestamp")))
    else:
        rows.sort(key=itemgetter(keys.index("target")))
    print(etl.text.format_lines(rows, header_row=columns))

Example #13

0

Show file

File: dpath.py Project: piotrklibert/dquery

def nfilter(pred, nodes):
    return NodeSet.from_seq(filter(pred, nodes.as_node_set()))

Example #14

0

Show file

def nfilter(pred, nodes):
    return NodeSet.from_seq(filter(pred, nodes.as_node_set()))

Example #15

0

Show file

File: relation.py Project: bhtucker/arthur-redshift-etl

def select_in_execution_order(
    relations: Sequence[RelationDescription],
    selector: TableSelector,
    include_dependents=False,
    include_immediate_views=False,
    continue_from: Optional[str] = None,
) -> List[RelationDescription]:
    """
    Return list of relations that were selected, optionally adding dependents or skipping forward.

    The values supported for skipping forward are:
      - '*' to start from the beginning
      - ':transformations' to only run transformations of selected relations
      - a specific relation to continue from that one in the original execution order
      - a specific schema to include all relations in that source schema as well as
          any originally selected transformation

    Note that these operate on the list of relations selected by the selector patterns.
    The option of '*' exists to we can have a default value in our pipeline definitions.
    The last option of specifying a schema is most useful with a source schema when you want
    to restart the load step followed by all transformations.

    No error is raised when the selector does not select any relations.
    An error is raised when the "continue from" condition does not resolve to a list of relations.
    """
    logger.info("Pondering execution order of %d relation(s)", len(relations))
    execution_order = order_by_dependencies(relations)

    selected = find_matches(execution_order, selector)
    if not selected:
        logger.warning("Found no relations matching: %s", selector)
        return []

    if include_dependents:
        dependents = find_dependents(execution_order, selected)
        combined = frozenset(selected).union(dependents)
        selected = [
            relation for relation in execution_order if relation in combined
        ]
    elif include_immediate_views:
        immediate_views = find_immediate_dependencies(execution_order,
                                                      selector)
        combined = frozenset(selected).union(immediate_views)
        selected = [
            relation for relation in execution_order if relation in combined
        ]

    if continue_from is None or continue_from == "*":
        return selected

    transformations = [
        relation for relation in selected if relation.is_transformation
    ]
    if continue_from in (":transformations", ":transformation"):
        if transformations:
            logger.info(
                "Continuing with %d transformation(s) in selected relations",
                len(transformations))
            return transformations
        raise InvalidArgumentError("found no transformations to continue from")

    logger.info("Trying to fast forward to '%s' within %d relation(s)",
                continue_from, len(selected))
    starting_from_match = list(
        fy.dropwhile(lambda relation: relation.identifier != continue_from,
                     selected))
    if starting_from_match:
        logger.info(
            "Continuing with %d relation(s) after skipping %d",
            len(starting_from_match),
            len(selected) - len(starting_from_match),
        )
        return starting_from_match

    single_schema = frozenset(
        fy.filter(lambda relation: relation.source_name == continue_from,
                  selected))
    if single_schema.intersection(transformations):
        raise InvalidArgumentError(
            f"schema '{continue_from}' contains transformations")
    if single_schema:
        combined = single_schema.union(transformations)
        logger.info(
            "Continuing with %d relation(s) in '%s' and %d transformation(s)",
            len(single_schema),
            continue_from,
            len(combined) - len(single_schema),
        )
        return [
            relation for relation in execution_order if relation in combined
        ]

    raise InvalidArgumentError("found no matching relations to continue from")

Example #16

0

Show file

File: fp.py Project: nikyq/SickZil-Machine

def filter(f, *seq):
    return F.filter(f,*seq) if seq \
    else lambda *xs: F.filter(f,*xs)

Example #17

0

Show file

File: outbound_activity.py Project: stvhanna/tap-bronto

    def sync(self):
        key_properties = self.catalog.get('key_properties')
        table = self.TABLE

        singer.write_schema(self.catalog.get('stream'),
                            self.catalog.get('schema'),
                            key_properties=key_properties)

        start = self.get_start_date(table)
        end = start
        interval = timedelta(hours=1)

        LOGGER.info('Syncing outbound activities.')

        while end < datetime.now(pytz.utc):
            self.login()
            start = end
            end = start + interval
            LOGGER.info("Fetching activities from {} to {}".format(start, end))

            _filter = self.make_filter(start, end)
            field_selector = get_field_selector(self.catalog.get('schema'))

            hasMore = True

            while hasMore:
                try:
                    results = \
                        self.client.service.readRecentOutboundActivities(
                            _filter)
                except suds.WebFault as e:
                    if '116' in e.fault.faultstring:
                        hasMore = False
                        break
                    else:
                        raise

                result_dicts = [
                    suds.sudsobject.asdict(result) for result in results
                ]

                parsed_results = [
                    field_selector(result) for result in result_dicts
                ]

                for result in parsed_results:
                    ids = [
                        'createdDate', 'activityType', 'contactId', 'listId',
                        'segmentId', 'keywordId', 'messageId'
                    ]

                    result['id'] = hashlib.md5('|'.join(
                        filter(
                            identity,
                            project(
                                result,
                                ids).values())).encode('utf-8')).hexdigest()

                singer.write_records(table, parsed_results)

                LOGGER.info('... {} results'.format(len(results)))

                _filter.readDirection = 'NEXT'

                if len(results) == 0:
                    hasMore = False

            self.state = incorporate(self.state, table, 'createdDate',
                                     start.replace(microsecond=0).isoformat())

            save_state(self.state)

        LOGGER.info('Done syncing outbound activities.')

Example #18

0

Show file

 def filter_activities(self, pred):
     return filter(pred, deepcopy(self.activities))