def clean_row(self, row): if not row: return [""] * 6 return juxt(lambda r: r.Index, lambda r: r.filepath, lambda r: int(r.line_number), lambda r: r.event_kind.strip(), lambda r: r.call_data[0].strip(), lambda r: r.og_index)(row)
def predictions_most_frequent(label, entry): data_keys = label['data_keys'] data_fn = juxt(map(op.itemgetter, data_keys)) histogram = pipe(entry['predictions'], partial(groupby, data_fn), partial(valmap, len), _ordered_dict_sorted_by_value) return map(lambda values: {k: v for k, v in zip(data_keys, values)}, histogram.keys())
def on_clause(table_a, table_b, join_map, special_null_handling=False): """ Given two analyze tables, and a map of join keys with the structure: [{'a_column': COLUMN_NAME, 'b_column: COLUMN_NAME}...] returns a sqlalchemy clause filtering on those join keys, suitable for passing to sqlalchemy.join If special_null_handling is set to True, it will generate a clause suitable for a WHERE clause, so that it can be used in an anti-join subquery for example. Specifically, it will have extra checks to join on null columns. Example: join_query = select_query.select_from(sqlalchemy.join( table_a, table_b, on_clause(table_a, table_b, self.config['join_map']) )) """ def column_a(jm): return next((c for c in table_a.columns if c.name == jm['a_column']), getattr(table_a.columns, jm['a_column'])) def column_b(jm): return next((c for c in table_b.columns if c.name == jm['b_column']), getattr(table_b.columns, jm['b_column'])) def column_a_equals_column_b(jm): return column_a(jm) == column_b(jm) def both_columns_null(jm): return sqlalchemy.and_( column_a(jm).is_(None), column_b(jm).is_(None), ) def my_or(lst): # Calls sqlalchemy.or_ on a single argument list instead of an *args list return sqlalchemy.or_(*lst) if special_null_handling: column_join_expression = compose(my_or, juxt( column_a_equals_column_b, both_columns_null, )) else: column_join_expression = column_a_equals_column_b return sqlalchemy.and_(*[ column_join_expression(jm) for jm in join_map ])
def writeIdnTypeToFile(file, positions): """ [String] output file name, [Iterator] positions => [String] output file name Side effect: write a csv file containing the id, idType for the positions A utility function, using which we can convert positions (Geneva or Bloomberg) to a file containing two columns (id, idType). The file will used to load Bloomberg information for asset type processing. """ noNeedId = lambda position: \ any(juxt(isPrivateSecurity, isCash, isMoneyMarket, isRepo, isFxForward)(position)) return \ compose( lambda idnTypes: writeCsv(file, chain([('ID', 'ID_TYPE')], idnTypes)) , set , partial(map, getIdnType) , partial(filterfalse, noNeedId) )(positions)
def test_juxt_generator_input(): data = list(range(10)) juxtfunc = juxt(itemgetter(2*i) for i in range(5)) assert tuple(juxtfunc(data)) == (0, 2, 4, 6, 8) assert tuple(juxtfunc(data)) == (0, 2, 4, 6, 8)
def match(p): data_fn = juxt(map(op.itemgetter, label['data_keys'])) # try deleting tuple func call return data_fn(elem) == data_fn(p)
print(f'loading predictions...', file=sys.stderr) dataset = ujson.load(args.input_predictions_file) print(f'loaded {len(dataset)} predictions', file=sys.stderr) limited_dataset = list( map( lambda elem: update_in(elem, ['predictions'], compose(list, partial(take, args.limit))), dataset)) if args.by_feature is not None: strategy_meta = meta[args.strategy] data_keys = get_feature_data_keys(strategy_meta, args.by_feature) grouped = groupby( compose(lambda x: '/'.join(x), juxt(map(op.itemgetter, data_keys))), limited_dataset) evaluation_fn = partial(evaluate, strategy_meta, args.predictions_limit) output = { value: evaluation_fn(section) for value, section in grouped.items() } else: output = evaluate(meta[args.strategy], args.predictions_limit, limited_dataset) print(json.dumps(output, indent=4, separators=(',', ': ')), end='', file=args.output_evaluation_file)
def add_common_arguments(overrides=None, description=None, epilog=None): """@todo: Docstring for add_common_arguments. :overrides: @todo :description: @todo :epilog: @todo :returns: @todo """ if not overrides: overrides = [] parser = argparse.ArgumentParser( description=description, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter) args = ( # # Input ('i', F(parser.add_argument, '-i', '--input', metavar="FILE", nargs='+', type=str, default=None, dest='input_files', help='The CSV file to operate on. If omitted, will accept ' 'input on STDIN.')), ('o', F(parser.add_argument, '-o', '--output', metavar="FILE", type=file, default=None, dest='output_file', help='The file to write the results to If omitted, this ' 'default to STDOUT..')), ('if', F(parser.add_argument, '--if', '--input-format', metavar="FORMAT", choices=['auto', 'tsv', 'csv', 'json'], default='auto', required=False, type=str, dest='input_format', help='XXX ..')), ('of', F(parser.add_argument, '--of', '--output-format', metavar="FORMAT", choices=['auto', 'monkey', 'tsv', 'csv', 'json'], default='auto', required=False, type=str, dest='output_format', help='XXX ..')), # ('b', curry(parser.add_argument, '-b', '--doublequote', # dest='doublequote', action='store_true', # help='Whether or not double quotes are doubled in the ' # 'input CSV file.')), # ('B', curry(parser.add_argument, '-B', '--ignore-blank-lines', # dest='ignore_blank_lines', action='store_true', # help='Ignore blank lines in the CSV file.')), # ('c', curry(parser.add_argument, '-c', '--comment-regex', # dest='comment_regex', default='^\\s*#', # help='Specify a regex to indicate comment rows')), # ('C', curry(parser.add_argument, '-C', '--add-comments', # dest='add_comments', action='store_true', # help='Add commented sections to output')), ('d', F(parser.add_argument, '-d', '--delimiter', dest='delimiter', default='\t', help='Delimiting character of the input CSV file.')), # ('e', curry(parser.add_argument, '-e', '--encoding', dest='encoding', # default='utf-8', # help='Specify the encoding the input CSV file.')), # ('H', curry(parser.add_argument, '-H', '--no-header-row', # dest='no_header_row', action='store_true', # help='Specifies that the input CSV file has no header ' # 'row. Will create default headers.')), # ('l', curry(parser.add_argument, '-l', '--linenumbers', # dest='line_numbers', action='store_true', # help='Insert a column of line numbers at the front of the ' # 'output. Useful when piping to grep or as a simple ' # 'primary key.')), # ('p', curry(parser.add_argument, '-p', '--escapechar', # dest='escapechar', # help='Character used to escape the delimiter if ' # '--quoting 3 ("Quote None") is specified and to escape ' # 'the QUOTECHAR if --doublequote is not specified.')), # ('q', curry(parser.add_argument, '-q', '--quotechar', dest='quotechar', # help='Character used to quote strings in the input CSV ' # 'file.')), # ('S', curry(parser.add_argument, '-S', '--skipinitialspace', # dest='skipinitialspace', default=False, # action='store_true', # help='Ignore whitespace immediately following the ' # 'delimiter.')), # ('t', curry(parser.add_argument, '-t', '--tabs', dest='tabs', # action='store_true', # help='Specifies that the input CSV file is delimited with ' # 'tabs. Overrides "-d".')), # ('u', curry(parser.add_argument, '-u', '--quoting', dest='quoting', # type=int, choices=[0, 1, 2, 3], # help='Quoting style used in the input CSV file. 0 = Quote ' # 'Minimal, 1 = Quote All, 2 = Quote Non-numeric, 3 = ' # 'Quote None.')), # ('v', curry(parser.add_argument, '-v', '--verbose', dest='verbose', # action='store_true', # help='Print detailed tracebacks when errors occur.')), # ('z', curry(parser.add_argument, '-z', '--maxfieldsize', # dest='maxfieldsize', type=int, # help='Maximum length of a single field in the input ' # 'CSV file.')), # ('strip-comments', curry(parser.add_argument, '--strip-comments', # dest='strip_comments', action='store_true', # help='Strip comments from input')), # ('zero', curry(parser.add_argument, '--zero', dest='zero_based', # action='store_true', # help='When interpreting or displaying column numbers, ' # 'use zero-based numbering instead of the default ' # '1-based numbering.'))) ) # TODO: # Add parser sections for csv, tsv, json in and outs. Make sure these # filter correctly # input_general = parser.add_argument_group(title='Input options', # description='General input options') # output_general = parser.add_argument_group(title='Output options', # description='Output options') # output_general.add_argument('--oc', '--output-columns', # metavar='COLUMNS', # nargs='+', # type=str, # default=None , # dest='output_columns', # help='Select the columns and their order to output') # csv_in = parser.add_argument_group(title='CSV and TSV input options', # description='Delimited...') # csv_in.add_argument('--id', '--input-delimiter', # dest='input_delimiter', # default='\t', # required=False, # type=str, # help='Delimiter for ...') # csv_out = parser.add_argument_group(title='CSV and TSV output options', # description='Delimited...') # csv_out.add_argument('--od', '--output-delimiter', # dest='output_delimiter', # default='\t', # required=False, # type=str, # help='Delimiter for ...') # json_in = parser.add_argument_group(title='JSON output options', # description='Delimited...') # json_out = parser.add_argument_group(title='JSON output options', # description='Delimited...') # monkey_out = parser.add_argument_group(title='Monkey-Readble output', # description='Monkey -Radble output') # monkey_out.add_argument('--cw', '--column_widths', # metavar='COLUMN_WIDTHS', # nargs='+', # type=str, # default=None , # dest='column_widths', # help='Specify a maximum width for columns.') functions = filter( lambda (override_option, f): override_option not in overrides, args) functions = map(lambda (override_option, f): f, functions) _ = tuple(juxt(functions)()) return parser
def ssr_config(s: str): return merge( *juxt(ssr_required_fields, ssr_extra_params)(s) )
map(lambda x: 2 * x + 1, range(0, 7))): print(x) """ We can use the 'juxt' function from the third party library to achieve a more consistent result, without worrying whether the arguments can be iterated once or not. The result is a list of tuples, as below: (1, 0) (4, 3) (7, 10) (10, 21) (13, 36) """ for x in map(juxt([add, mul]), range(5), map(lambda x: 2 * x + 1, range(0, 7))): print(x) """ If we want to two lists, one for outcome of each function, like below: (1, 4, 7, 10, 13) and (0, 3, 10, 21, 36) Then we can use zip() to transpose the list: [(1,0), (4,3), (7,10), (10,21), (13,36)] into the new list: [ (1, 4, 7, 10, 13)
all_pred = lambda item, *tests: all(p(item) for p in tests) any_pred = lambda item, *tests: any(p(item) for p in tests) from functools import partial is_lt100 = partial(operator.ge, 100) # less than 100? is_gt10 = partial(operator.le, 10) # greater than 10? # use partial() add args from nums import is_prime # implemented elsewhere all_pred(71, is_lt100, is_gt10, is_prime) predicates = (is_lt100, is_gt10, is_prime) all_pred(107, *predicates) # check each func sperately >>> from toolz.functoolz import juxt >>> juxt([is_lt100, is_gt10, is_prime])(71) (True, True, True) >>> all(juxt([is_lt100, is_gt10, is_prime])(71)) True >>> juxt([is_lt100, is_gt10, is_prime])(107) (False, True, True) # Compare ad hoc lambda with `operator` function sum1 = reduce(lambda a, b: a+b, iterable, 0) sum2 = reduce(operator.add, iterable, 0) sum3 = sum(iterable) # The actual Pythonic way # define some_func and other_func are equivalent
def __init__(self, user_agent=None, proxy=None, cargs=None, ckwargs=None, extensions=None, logger=None, factory=None, flags=None): """ Selenium compatible Remote Driver instance. Args: user_agent (str or Callable): overwrite browser's default user agent. If ``user_agent`` is a Callable then the result will be used as the user agent string for this browser instance. proxy (Proxy or SquidProxy): Proxy (or SquidProxy) instance that routes container traffic. cargs (list): container creation arguments. ckwargs (dict): container creation keyword arguments. extensions (list): list of file locations loaded as browser extensions. logger (:obj:`~logging.Logger`): logging module Logger instance. factory (:obj:`~selenium_docker.base.ContainerFactory`): abstract connection to a Docker Engine that does the primary interaction with starting and stopping containers. flags (:obj:`aenum.Flag`): bit flags used to turn advanced features on or off. Raises: ValueError: when ``proxy`` is an unknown/invalid value. Exception: when any problem occurs connecting the driver to its underlying container. """ args = cargs or [] ckwargs = ckwargs or {} extensions = extensions or [] # create the container self.factory = factory or ContainerFactory.get_default_factory() self.factory.load_image(self.CONTAINER, background=False) self._name = ckwargs.setdefault('name', self.factory.gen_name()) self.logger = logger or logging.getLogger( '%s.%s.%s' % (__name__, self.identity, self.name)) self.container = self._make_container(**ckwargs) self._base_url = self.get_url() # user_agent can also be a callable function to randomly select one # at instantiation time user_agent = user_agent() if callable(user_agent) else user_agent self._perform_check_container_ready() # figure out if we're using a proxy self._proxy, self._proxy_container = None, None if isinstance(proxy, Proxy): # Selenium Proxy self._proxy_container = None self._proxy = proxy elif hasattr(proxy, 'selenium_proxy'): # Container for SquidProxy, extract Selenium portion self._proxy_container = proxy self._proxy = proxy.selenium_proxy elif proxy not in [None, False]: raise ValueError('invalid proxy type, %s' % type(proxy)) # build our web driver capabilities self.flags = self.Flags.DISABLED if not flags else flags fn = juxt(self._capabilities, self._profile) capabilities, profile = fn(args, extensions, self._proxy, user_agent) try: # build our web driver super(DockerDriverBase, self).__init__(self._base_url, desired_capabilities=capabilities, browser_profile=profile, keep_alive=False) except Exception as e: self.logger.exception(e, exc_info=True) self.close_container() raise e # driver configuration self.implicitly_wait(self.IMPLICIT_WAIT_SECONDS) self._final(args, extensions, self._proxy, user_agent)
from toolz.itertoolz import groupby as groupbyToolz from toolz.dicttoolz import valmap from functools import partial, reduce from itertools import filterfalse, chain, takewhile from datetime import datetime from os.path import join import logging logger = logging.getLogger(__name__) """ [Dictionary] position => [Bool] needs bloomberg data to determine asset type """ needBlpData = lambda position: \ not any(juxt( isPrivateSecurity , isCash , isMoneyMarket , isRepo , isFxForward)(position)) def writeIdnTypeToFile(file, positions): """ [String] output file name, [Iterator] positions => [String] output file name Side effect: write a csv file containing the id, idType for the positions A utility function, using which we can convert positions (Geneva or Bloomberg) to a file containing two columns (id, idType). The file will used to load Bloomberg information for asset type processing.
def del_res(): printi = lambda i: print('deleted resource {}'.format(i)) map(lambda res: juxt(_resource_delete, printi)(res['id']), resources)
any(map(lambda p: p(71), predicates)) # by comprehension [p(71) for p in predicates] #%% """ The library toolz has what might be a more general version of this called juxt() that creates a function that calls several functions with the same arguments and returns a tuple of results. We could use that, for example, to do: """ from toolz.functoolz import juxt juxt([is_lt100, is_gt10, is_odd])(71) all(juxt([is_lt100, is_gt10, is_odd])(71)) all(juxt([is_lt100, is_gt10, is_odd])(107)) any(juxt([is_lt100, is_gt10, is_odd])(107)) #%% the operator module import operator as op iterable = [1, 3, 2, 5, -10, 20] # using lambda ft.reduce(lambda a, b: a + b, iterable, 0) # using operator ft.reduce(op.add, iterable, 0)
def add_common_arguments(overrides=None, description=None, epilog=None): """@todo: Docstring for add_common_arguments. :overrides: @todo :description: @todo :epilog: @todo :returns: @todo """ if not overrides: overrides = [] parser = argparse.ArgumentParser( description=description, epilog=epilog, formatter_class=argparse.RawDescriptionHelpFormatter) args = ( # # Input ('i', F(parser.add_argument, '-i', '--input', metavar="FILE", nargs='+', type=str, default=None, dest='input_files', help='The CSV file to operate on. If omitted, will accept ' 'input on STDIN.')), ('o', F(parser.add_argument, '-o', '--output', metavar="FILE", type=file, default=None, dest='output_file', help='The file to write the results to If omitted, this ' 'default to STDOUT..')), ('if', F(parser.add_argument, '--if', '--input-format', metavar="FORMAT", choices=['auto', 'tsv', 'csv', 'json'], default='auto', required=False, type=str, dest='input_format', help='XXX ..')), ('of', F(parser.add_argument, '--of', '--output-format', metavar="FORMAT", choices=['auto', 'monkey', 'tsv', 'csv', 'json'], default='auto', required=False, type=str, dest='output_format', help='XXX ..')), # ('b', curry(parser.add_argument, '-b', '--doublequote', # dest='doublequote', action='store_true', # help='Whether or not double quotes are doubled in the ' # 'input CSV file.')), # ('B', curry(parser.add_argument, '-B', '--ignore-blank-lines', # dest='ignore_blank_lines', action='store_true', # help='Ignore blank lines in the CSV file.')), # ('c', curry(parser.add_argument, '-c', '--comment-regex', # dest='comment_regex', default='^\\s*#', # help='Specify a regex to indicate comment rows')), # ('C', curry(parser.add_argument, '-C', '--add-comments', # dest='add_comments', action='store_true', # help='Add commented sections to output')), ('d', F(parser.add_argument, '-d', '--delimiter', dest='delimiter', default='\t', help='Delimiting character of the input CSV file.')), # ('e', curry(parser.add_argument, '-e', '--encoding', dest='encoding', # default='utf-8', # help='Specify the encoding the input CSV file.')), # ('H', curry(parser.add_argument, '-H', '--no-header-row', # dest='no_header_row', action='store_true', # help='Specifies that the input CSV file has no header ' # 'row. Will create default headers.')), # ('l', curry(parser.add_argument, '-l', '--linenumbers', # dest='line_numbers', action='store_true', # help='Insert a column of line numbers at the front of the ' # 'output. Useful when piping to grep or as a simple ' # 'primary key.')), # ('p', curry(parser.add_argument, '-p', '--escapechar', # dest='escapechar', # help='Character used to escape the delimiter if ' # '--quoting 3 ("Quote None") is specified and to escape ' # 'the QUOTECHAR if --doublequote is not specified.')), # ('q', curry(parser.add_argument, '-q', '--quotechar', dest='quotechar', # help='Character used to quote strings in the input CSV ' # 'file.')), # ('S', curry(parser.add_argument, '-S', '--skipinitialspace', # dest='skipinitialspace', default=False, # action='store_true', # help='Ignore whitespace immediately following the ' # 'delimiter.')), # ('t', curry(parser.add_argument, '-t', '--tabs', dest='tabs', # action='store_true', # help='Specifies that the input CSV file is delimited with ' # 'tabs. Overrides "-d".')), # ('u', curry(parser.add_argument, '-u', '--quoting', dest='quoting', # type=int, choices=[0, 1, 2, 3], # help='Quoting style used in the input CSV file. 0 = Quote ' # 'Minimal, 1 = Quote All, 2 = Quote Non-numeric, 3 = ' # 'Quote None.')), # ('v', curry(parser.add_argument, '-v', '--verbose', dest='verbose', # action='store_true', # help='Print detailed tracebacks when errors occur.')), # ('z', curry(parser.add_argument, '-z', '--maxfieldsize', # dest='maxfieldsize', type=int, # help='Maximum length of a single field in the input ' # 'CSV file.')), # ('strip-comments', curry(parser.add_argument, '--strip-comments', # dest='strip_comments', action='store_true', # help='Strip comments from input')), # ('zero', curry(parser.add_argument, '--zero', dest='zero_based', # action='store_true', # help='When interpreting or displaying column numbers, ' # 'use zero-based numbering instead of the default ' # '1-based numbering.'))) ) # TODO: # Add parser sections for csv, tsv, json in and outs. Make sure these # filter correctly # input_general = parser.add_argument_group(title='Input options', # description='General input options') # output_general = parser.add_argument_group(title='Output options', # description='Output options') # output_general.add_argument('--oc', '--output-columns', # metavar='COLUMNS', # nargs='+', # type=str, # default=None , # dest='output_columns', # help='Select the columns and their order to output') # csv_in = parser.add_argument_group(title='CSV and TSV input options', # description='Delimited...') # csv_in.add_argument('--id', '--input-delimiter', # dest='input_delimiter', # default='\t', # required=False, # type=str, # help='Delimiter for ...') # csv_out = parser.add_argument_group(title='CSV and TSV output options', # description='Delimited...') # csv_out.add_argument('--od', '--output-delimiter', # dest='output_delimiter', # default='\t', # required=False, # type=str, # help='Delimiter for ...') # json_in = parser.add_argument_group(title='JSON output options', # description='Delimited...') # json_out = parser.add_argument_group(title='JSON output options', # description='Delimited...') # monkey_out = parser.add_argument_group(title='Monkey-Readble output', # description='Monkey -Radble output') # monkey_out.add_argument('--cw', '--column_widths', # metavar='COLUMN_WIDTHS', # nargs='+', # type=str, # default=None , # dest='column_widths', # help='Specify a maximum width for columns.') functions = filter(lambda (override_option, f): override_option not in overrides, args) functions = map(lambda(override_option, f): f, functions) _ = tuple(juxt(functions)()) return parser