def test_get_matching_items(): sessions1 = ['session_{}'.format(r) for r in range(10)] sessions2 = ['session_{}'.format(r) for r in range(5)] subjects2 = ['subject_{}'.format(r) for r in range(5)] assert list(_get_matching_items(sessions1, subjects2)) == [] assert list(_get_matching_items(sessions1, sessions2)) == sessions2 assert list(_get_matching_items(sessions1, sessions2, items=['session_1'])) == ['session_1'] assert list(_get_matching_items(sessions1, sessions2, items=sessions2)) == sessions2 assert list(_get_matching_items(sessions1, sessions2, items=['hansel'])) == []
def test_get_matching_items(): sessions1 = ["session_{}".format(r) for r in range(10)] sessions2 = ["session_{}".format(r) for r in range(5)] subjects2 = ["subject_{}".format(r) for r in range(5)] assert list(_get_matching_items(sessions1, subjects2)) == [] assert list(_get_matching_items(sessions1, sessions2)) == sessions2 assert list(_get_matching_items(sessions1, sessions2, items=["session_1"])) == ["session_1"] assert list(_get_matching_items(sessions1, sessions2, items=sessions2)) == sessions2 assert list(_get_matching_items(sessions1, sessions2, items=["hansel"])) == []
def df_to_valuesmap(df: 'pandas.DataFrame', crumb_arg_names: Iterator[str], arg_names: Iterator[str] = None) -> CrumbArgsSequences: """ Return a values_map from data in `df` and the matching column and arguments names from `df`, `crumb_arg_names` and `arg_names`. Parameters ---------- df: pandas.DataFrame crumb_arg_names: arg_names: sequence of str A list of the crumb arguments and DataFrame columns to extract the info to fill the crumbs. Both must match, or use _pandas_rename_cols to rename the columns. If None, will look for all the arguments that match in both `df` and `arg_names`. Example: ['subject_id'] Returns ------- values_map: list of sequences of 2-tuple """ crumb_names = _get_matching_items(df.columns, crumb_arg_names, arg_names) # get the columns of df that have been matched return (list(rec.items()) for rec in df[crumb_names].to_dict(orient='records'))
def df_to_valuesmap(df, crumb_arg_names, arg_names=None): """ Return a values_map from data in `df` and the matching column and arguments names from `df`, `crumb_arg_names` and `arg_names`. Parameters ---------- df: pandas.DataFrame crumb: hansel.Crumb arg_names: sequence of str A list of the crumb arguments and DataFrame columns to extract the info to fill the crumbs. Both must match, or use _pandas_rename_cols to rename the columns. If None, will look for all the arguments that match in both `df` and `arg_names`. Example: ['subject_id'] Returns ------- values_map: list of sequences of 2-tuple """ crumb_names = _get_matching_items(df.columns, crumb_arg_names, arg_names) # get the columns of df that have been matched return (list(rec.items()) for rec in df[crumb_names].to_dict(orient='records'))
def intersection(crumb1: hansel.Crumb, crumb2: hansel.Crumb, on: Iterator[str] = None) -> List[str]: """Return an 'inner join' of both given Crumbs, i.e., will return a list of Crumbs with common values for the common arguments of both crumbs. If `on` is None, will use all the common arguments names of both crumbs. Otherwise will use only the elements of `on`. All its items must be in both crumbs. Returns ------- arg_names: list The matching items. Parameters ---------- crumb1: hansel.Crumb crumb2: hansel.Crumb on: str or list of str Crumb argument names common to both input crumbs. Raises ------ ValueError: If an element of `on` does not exists in either `list1` or `list2`. KeyError: If the result is empty. Returns ------- inner_join: list[hansel.Crumb] Notes ----- Use with care, ideally the argument matches should be in the same order in both crumbs. Both crumbs must have at least one matching identifier argument and one of those must be the one in `on`. """ if isinstance(on, str): on = [on] arg_names = list( _get_matching_items(list(crumb1.all_args()), list(crumb2.all_args()), items=on)) if not arg_names: raise KeyError( "Could not find matching arguments between {} and {} limited by {}." .format(list(crumb1.all_args()), list(crumb2.all_args()), on)) maps1 = joint_value_map(crumb1, arg_names, check_exists=True) maps2 = joint_value_map(crumb2, arg_names, check_exists=True) intersect = set(maps1) & (set(maps2)) return sorted(list(intersect))