def compare_streams(db_engine, date_range, stream_names, allowed_parts_of_speech, max_num_words): """Compare tokens from each stream in the stream_names list""" ## Create token count dictionaries for each stream name count_dicts_dict = {} for stream_name in stream_names: count_dicts_dict[stream_name] = tz.pipe( get_content( db_engine, stream_name, date_range), parse_content_into_count(max_num_words, allowed_parts_of_speech)) ## Create cross-stream count dictionary all_streams_count_dict = reduce( lambda x,y: tz.merge_with(sum, x, y), count_dicts_dict.values()) ## Calculate posterior probabilities of the tokens posterior_probs = {} for stream_name in stream_names: posterior_probs[stream_name] = tz.pipe( get_posterior_probs_freq( 500, # limited to the 500 most frequent words in this stream, at this time all_streams_count_dict, count_dicts_dict[stream_name]), tz.map(lambda x: tz.merge({"stream":stream_name}, x)), tz.take(max_num_words), list, ) return posterior_probs
def send_message_to(self, recipient: ServerInfo, msg: Message): # asyncio.sleep syspends the function and allows the event loop to continue processing on the next scheduled # coroutine in the queue, until this one finishes its sleep yield from asyncio.sleep(3) reader, writer = yield from asyncio.open_connection(recipient.ip, recipient.port, loop=loop) pipe(msg, pickle.dumps, writer.write) writer.close()
def save_as_html(distinct_words, file_name): """Generate and save an html display of the distinct words""" ## Wrangle data for presentation # Convert tokens into a single string def get_token_string(given_values): """Return a token string, if the given values are a list of dictionaries""" # check if it is a list of token-related information if (isinstance(given_values, list) and len(given_values) > 0 and isinstance(given_values[0], dict)): return tz.pipe( given_values, tz.map(lambda x: x['token']), tz.map(wrap_in_highlight_link), # wrap in link to highlight words tz.reduce(lambda x,y: u"{}, {}".format(x, y))) # return empty string for empty lists elif isinstance(given_values, list) and len(given_values) == 0: return '' # check if it is a date range in need of formating elif isinstance(given_values, list) and len(given_values) == 2: return format_date_range(given_values) else: return given_values def format_date_range(given_date_range): """Return a pretty version of the given date_range""" date_range = map( lambda x: dt.datetime.strptime(x, "%Y-%m-%dT%H:%M:%SZ"), given_date_range) return "{} to {} UTC".format( date_range[0].strftime("%Y-%m-%d %H:%M"), date_range[1].strftime("%H:%M")) def wrap_in_highlight_link(given_string): """return the given string wrapped in the html code to highlight other occurances of that same word""" return u"""<a href="javascript:void($('.distinct_words').removeHighlight().highlight('{string}'));">{string}</a>""".format(string=given_string) formated_distinct_words = tz.pipe( distinct_words, tz.map( tz.valmap(get_token_string)), list) ## Send to Template For Display template_dir = 'templates' loader = jinja2.FileSystemLoader(template_dir) environment = jinja2.Environment(loader=loader) template = environment.get_template('distinct_words.html') with open(file_name, 'w') as f: tz.pipe( template.render(distinct_words = formated_distinct_words), lambda x: x.encode('utf8'), lambda x: f.write(x))
def ipython_display(specs): """Run publish_display_data for the JS and HTML Args: specs: a list of Vega specs """ pipe( specs, map(lambda x: (uuid.uuid4(), vega.Vega(x))), list, do(html_publish_map), map(tlam(js_publish)), list )
def html_publish_map(data): """Run IPython's 'publish_display_data' for each spec. Args: data: list of (id, spec) pairings """ pipe( data, map(lambda x: x[0]), list, lambda x: publish_display_data( {'text/html': render_html(x)}, metadata={'jupyter-vega': '#{0}'.format(x[0])}) )
def send_message_to(self, recipient: ServerInfo, msg: Message): # asyncio.sleep suspends the function and allows the event loop to continue processing on the next scheduled # coroutine in the queue, until this one finishes its sleep yield from asyncio.sleep(2) # simulating send delay of 2 seconds reader, writer = \ yield from asyncio.open_connection( recipient.ip, recipient.port, loop=loop) pipe( msg, pickle.dumps, writer.write ) # yield from asyncio.sleep(x) # simulate slow transfer (eg. huge file or very low bandwidth) writer.close()
def fancify_summary(expr): """ Separate a complex summary into two pieces Helps pandas compute_by on summaries >>> t = symbol('t', 'var * {x: int, y: int}') >>> one, two, three = fancify_summary(summary(a=t.x.sum(), b=t.x.sum() + t.y.count() - 1)) A simpler summary with only raw reductions >>> one summary(x_sum=sum(t.x), y_count=count(t.y)) A mapping of those names to new leaves to use in another compuation >>> two # doctest: +SKIP {'x_sum': x_sum, 'y_count': y_count} A mapping of computations to do for each column >>> three # doctest: +SKIP {'a': x_sum, 'b': (x_sum + y_count) - 1} In this way, ``compute_by`` is able to do simple pandas reductions using groups.agg(...) and then do columnwise arithmetic afterwards. """ seen_names.clear() name_dict.clear() exprs = pipe(expr.values, map(Expr._traverse), concat, filter(lambda x: isinstance(x, Reduction)), set) one = summary(**dict((_name(expr), expr) for expr in exprs)) two = dict((_name(expr), symbol(_name(expr), datashape.var * expr.dshape)) for expr in exprs) d = dict((expr, two[_name(expr)]) for expr in exprs) three = dict((name, value._subs(d)) for name, value in zip(expr.names, expr.values)) return one, two, three
def outer_dict(dict_in): """Outer product of dictionary values Args: dict_in: a dictionary with iterable values Returns: a list of dictionaries >>> assert pipe( ... dict(a=[1], b=[2, 3]), ... curry(outer_dict), ... lambda x: x == [dict(a=1, b=2), dict(a=1, b=3)] ... ) """ return pipe( dict_in.items(), lambda x: zip(*x), list, lambda x: (x[0], product(*x[1])), tlam(lambda x, y: zip(repeat(x), y)), map(lambda x: zip(*x)), map(dict), list )
def vega_plot_treants(treants): """Make a vega plot with side-by-side plots Args: treants: a list of treants Returns a MultiVega instance >>> from click.testing import CliRunner >>> from extremefill2D.fextreme import init_sim >>> from extremefill2D.fextreme.tools import base_path >>> with CliRunner().isolated_filesystem() as dir_: ... assert pipe( ... os.path.join(base_path(), 'scripts', 'params.json'), ... init_sim(data_path=dir_), ... lambda x: [x, x], ... vega_plot_treants, ... lambda x: type(x) is MultiVega) """ return pipe( treants, map(lambda x: render_spec([x])), list, MultiVega )
def contours(data): """Get zero contours from x, y, z data Args: data: dictionary with (x, y, z, dx) keys Returns: a list of (N, 2) numpy arrays representing the contours """ def linspace_(arr, spacing): """Calcuate the linspace based on a spacing """ return pipe( arr, juxt(min, max), tlam(lambda x_, y_: np.linspace(x_, y_, (y_ - x_) / spacing)) ) return pipe( data, lambda x: dict(xi=linspace_(x['x'], x['dx']), yi=linspace_(x['y'], x['dx']), **x), lambda x: griddata((x['y'], x['x']), x['z'], (x['yi'][None, :], x['xi'][:, None]), method='cubic'), lambda x: measure.find_contours(x, 0.0), map(lambda x: float(data['dx']) * x) )
def map(self, func, data): # pylint: disable=no-self-use return pipe( data, map(func), map(DummyResult), list )
def load_errors_definitions(): """Return `definition_by_error_name` dict from the file `errH.json`.""" return pipe( load_json(os.path.join(json_dir_path, 'ast', 'errH.json')), map(lambda d: (d['name'], d)), # Index by name and keep singleton value (groupby creates list values) dict, )
def rolling_fit_opt_weights(df, opt_weights_func, look_ahead_per): """applies opt_weights_func to rolling window on pandas df""" num_rows = df.shape[0] p = pipe(xrange(num_rows), filter(lambda x: x + look_ahead_per < num_rows), map(lambda x: {df.index[x]: opt_weights_func(df.iloc[x:x+look_ahead_per+1])})) return pd.DataFrame(merge(p)).T
def compare_streams_across_time(db_engine, configuration): """Return distinct words for each considered stream at each time step in the given date range.""" def date_range_iterator(overall_date_range, time_step): """Returns an iterator of the time ranges being considered. time_step is assumed to be in minutes""" def get_time(overall_start, time_step, step): """Return the timestamp that is step time_step's beyond overall_start""" return (overall_start + (time_step*(step-1))).strftime("%Y-%m-%dT%H:%M:%SZ") overall_start = dt.datetime.strptime(overall_date_range[0], "%Y-%m-%dT%H:%M:%SZ") overall_end = dt.datetime.strptime(overall_date_range[1], "%Y-%m-%dT%H:%M:%SZ") time_step = dt.timedelta(minutes=time_step) return tz.pipe( # Number of steps to take (overall_end - overall_start).total_seconds() / time_step.total_seconds(), int, # Build range lambda x: range(1,x+2), # Convert to timestamps tz.map(lambda x: [ get_time(overall_start, time_step, x-1), get_time(overall_start, time_step, x)])) result = [] for date_range in date_range_iterator(configuration['overall_date_range'], configuration['time_step']): result.append( tz.pipe( # Stream comparison for a particular time period compare_streams( db_engine, date_range, configuration['stream_names'], configuration['allowed_parts_of_speech'], configuration['max_num_words']), lambda x: tz.merge(x, {'date_range': date_range}))) # add in date_range entry return result
def get_treant_df(tags, path='.'): """Get treants as a Pandas DataFrame Args: tags: treant tags to identify the treants path: the path to search for treants Returns: a Pandas DataFrame with the treant name, tags and categories >>> from click.testing import CliRunner >>> from toolz.curried import do >>> with CliRunner().isolated_filesystem() as dir_: ... assert pipe( ... dir_, ... dtr.Treant, ... do(lambda x: x.__setattr__('tags', ['atag'])), ... lambda x: x.uuid[:8], ... lambda x: x == get_treant_df(['atag'], path=dir_).uuid[0] ... ) """ return pipe( tags, get_by_tags(path=path), lambda x: x.map(get_treant_data), pandas.DataFrame, )
def linspace_(arr, spacing): """Calcuate the linspace based on a spacing """ return pipe( arr, juxt(min, max), tlam(lambda x_, y_: np.linspace(x_, y_, (y_ - x_) / spacing)) )
def destruct(x): """ Deconstructs a data structure into a 1-D np.ndarray (via multiple dispatch) Converts a list of numpy arrays to a single array """ # unravel each array, c return pipe(x, map(destruct), concat, list, np.array)
def visit_formula(node): formula_name = node['name'] dependencies = pipe( visit_node(node['expression']), unique, list, ) return (formula_name, dependencies)
def opt_weight_ir_grid(df, alphas, look_ahead_pers, long_only=True, tilt_weights=None): """exhaustive grid search over alphas, look_ahead_per, norm_types returning dataframe of cumulative returns for each optimal portfolio construction""" norm_types = [2,] end_date = df.index[-(look_ahead_pers[-1] + 1)] p = pipe(product(alphas, norm_types, look_ahead_pers), map(lambda x: list(x) + [calc_opt_weight_portfolio_ir(df, x[0], x[1], x[2], long_only, tilt_weights)]), map(lambda x: dict(zip(['alpha', 'norm_type', 'look_ahead_per', 'ir'], x)))) return pd.DataFrame(list(p))
def get_top_tokens(n, count_dict): """Return the top n most frequent tokens in the count_dict If n > len(count_dict), it will just return them all""" return tz.pipe( count_dict, lambda x: x.items(), lambda x: sorted(x, key=lambda y: -y[1]), lambda x: tz.take(n, x), list)
def test_enum(): """Test enum """ assert pipe( ('a', 'b', 'c', 'd'), enum(lambda i, x: (i, x)), list, lambda x: x == [(0, 'a'), (1, 'b'), (2, 'c'), (3, 'd')] )
def discover_jsonlines(j, n=10, encoding='utf-8', **kwargs): with json_lines(j.path, encoding=encoding) as lines: data = pipe(lines, filter(nonempty), map(json.loads), take(n), list) if len(data) < n: ds = discover(data) else: ds = var * discover(data).subshape[0] return date_to_datetime_dshape(ds)
def visit_ternary_operator(node): return pipe([ visit_node(node['value_if_true']), visit_node(node['condition']), visit_node(node['value_if_false']) if 'value_if_false' in node else None, ], filter(None), concat, )
def latest(treant): """Get the latest data file available based on a sort. """ return pipe( treant.glob('*.nc'), sorted, last, lambda leaf: leaf.abspath, )
def test_get_by_uuid(): """Test get_by_uuid """ from click.testing import CliRunner with CliRunner().isolated_filesystem() as dir_: assert pipe( dir_, dtr.Treant, lambda x: x.uuid == get_by_uuid(x.uuid[:8]).uuid)
def iter_ast_json_file_names(filenames): json_file_paths = pipe( filenames, map(lambda pathname: os.path.join(args.json_dir, 'ast', pathname)), mapcat(glob.iglob), sorted, ) for json_file_path in json_file_paths: json_file_name = os.path.basename(json_file_path) file_name_head = os.path.splitext(json_file_name)[0] yield json_file_name
def test_set_treant_categories(): """Test set_treant_categories """ from click.testing import CliRunner with CliRunner().isolated_filesystem() as dir_: assert pipe( dir_, dtr.Treant, set_treant_categories(dict(a=1)), lambda x: x.categories['a'] == 1 )
def destruct(x): """ Deconstructs a data structure into a 1-D np.ndarray (via multiple dispatch) Converts a list of numpy arrays to a single array """ # make sure the values are all numpy arrays list(map(enforce(np.ndarray), x)) # unravel each array, c return pipe(x, map(np.ravel), concat, list, np.array)
def get_path(file_): """Return the local file path for this file. Returns: the filepath """ return pipe( file_, os.path.realpath, os.path.split, get(0) )
def process(text): """ Replace failures in docstring with results """ parts = pipe(text, parser.parse, filter(None), map(separate_fence), concat, list) scope = dict() # scope of variables in our executed environment state = dict() # state of pymarkdown traversal out_parts = list() for part in parts: out, scope, state = step(part, scope, state) out_parts.extend(out) head = '\n'.join(sorted(state.get('headers', set()))) body = pipe(out_parts, map(render_part), filter(None), '\n'.join) foot = '\n\n'.join(state.get('footers', [])) return '\n\n'.join([head, body, foot]).strip()
def coeff_to_real(coeff, new_shape=None): r"""Convert the coefficients to real space Convert the :class:`pymks.LocalizationRegressor` coefficiencts to real space. The coefficiencts are calculated in Fourier space, but best viewed in real space. If the Fourier coefficients are defined as :math:`\beta\left[l, k\right]` then the real space coefficients are calculated using, .. math:: \alpha \left[l, r\right] = \frac{1}{N} \sum_{k=0}^{N-1} \beta\left[l, k\right] e^{i \frac{2 \pi}{N} k r} e^{i \pi} where :math:`l` is the local state and :math:`r` is the spatial index from :math:`0` to :math:`N-1`. The :math:`e^{i \pi}` term is a shift applied to place the 0 coefficient at the center of the domain for viewing purposes. Args: coeff (array): the localization coefficients in Fourier space as a Dask array `(n_x, n_y, n_state)` new_shape (tuple): shape of the output to either shorten or pad with zeros Returns: the coefficients in real space A spike at :math:`k=1` should result in a cosine function on the real axis. >>> N = 100 >>> fcoeff = np.zeros((N, 1)) >>> fcoeff[1] = N >>> x = np.linspace(0, 1, N + 1)[:-1] >>> assert np.allclose( ... coeff_to_real(da.from_array(fcoeff)).real.compute(), ... np.cos(2 * np.pi * x + np.pi)[:, None] ... ) """ # pylint: disable=line-too-long; # noqa: #501 return pipe( coeff, daifftn(axes=_ini_axes(coeff), s=new_shape), dafftshift(axes=_ini_axes(coeff)), )
def create_questions(course: Course, quiz_data: dict): '''Ok, so... We have to destroy/recreate the questions because something weird happens when we do a PUT on an individual QuizQuestion object. So, we can't do spot-updates on existing questions. Yay. ''' quiz_ep = find_quiz(course, quiz_data['title']) log.info(f"[create_questions] Pulling question data for quiz:" f" {quiz_data['title']} ...") question_eps = questions(quiz_ep) question_data = quiz_data['questions'] quiz_md = get_metadata(quiz_ep) question_md = quiz_md['questions'] question_hashes = pipe( question_data, map(common.hash_from_dict), tuple, ) if all(a == b for a, b in itertools.zip_longest(question_hashes, question_md['hashes'])): log.info( '[create_questions] ... no differences detected in the questions.') return question_eps if question_eps: log.info('[create_questions] ... questions differ.. deleting' f' {len(question_eps)} existing questions.') for q_ep in question_eps: q_ep.delete() log.info('[create_questions] Creating new questions') question_eps = [new_question(quiz_ep, q_data) for q_data in question_data] quiz_md['questions']['hashes'] = question_hashes set_metadata(quiz_ep, quiz_md) log.info( f'[create_questions] Updating question count: {len(question_data)}') update_endpoint(quiz_ep, {'question_count': len(question_data)}) return question_eps
def coeff_resize(coeff, shape): """Resize the influence coefficients. Resize the influence coefficients by padding with zeros to the size determined by shape. Apply to coefficients in frequency space. Args: coeff: the influence coefficients with size (nx, ny, nz, nstate) shape: the new padded shape (NX, NY, NZ) Returns: the resized influence coefficients >>> from .func import ifftshift, fftn >>> assert pipe( ... np.arange(20).reshape((5, 4, 1)), ... lambda x: np.concatenate((x, np.ones_like(x)), axis=-1), ... ifftshift(axes=(0, 1)), ... fftn(axes=(0, 1)), ... lambda x: da.from_array(x, chunks=x.shape), ... coeff_resize(shape=(10, 7)), ... coeff_to_real, ... lambda x: np.allclose(x.real[..., 0], ... [[0, 0, 0, 0, 0, 0, 0], ... [0, 0, 0, 0, 0, 0, 0], ... [0, 0, 0, 0, 0, 0, 0], ... [0, 0, 0, 1, 2, 3, 0], ... [0, 0, 4, 5, 6, 7, 0], ... [0, 0, 8, 9,10,11, 0], ... [0, 0,12,13,14,15, 0], ... [0, 0,16,17,18,19, 0], ... [0, 0, 0, 0, 0, 0, 0], ... [0, 0, 0, 0, 0, 0, 0]]) ... ) """ return pipe( coeff, coeff_to_real, zero_pad( shape=shape + coeff.shape[-1:], chunks=((-1, ) * len(shape)) + (coeff.chunks[-1], ), ), coeff_to_frequency, )
def test_3d(): """Test FE in 3D """ def setone(arr): arr[0, :, (arr.shape[0] - 1) // 2] = 1.0 return arr assert pipe( 5, lambda x: np.zeros((1, x, x, x), dtype=int), setone, solve_fe(elastic_modulus=(1.0, 10.0), poissons_ratio=(0.0, 0.0)), lambda x: np.allclose( [np.mean(x["strain"][0, ..., i]) for i in range(6)], [1.0, 0.0, 0.0, 0.0, 0.0, 0.0], ), )
def process_chart(id_, data, j2_file_name): """Process chart's YAML with data. Args: id_: the benchmark ID data: the data to process the YAML file j2_file_name: the name of the j2 file to process Returns: the rendered YAML as a dictionary """ return pipe( get_chart_file(j2_file_name), render_yaml( data=data, id_=id_, marks=get_marks().get(id_, get_marks()["default"]) ), yaml.load, )
def func(min_xyz, max_xyz): def shift_(_, coors, __): return np.ones_like(coors[:, 0]) * macro_strain * (max_xyz[0] - min_xyz[0]) return pipe( [("plus", max_xyz[0]), ("minus", min_xyz[0])], map_(get_region_func(None, "x", domain)), list, lambda x: LinearCombinationBC( "lcbc", x, {"u.0": "u.0"}, Function("match_x_plane", per.match_x_plane), "shifted_periodic", arguments=(Function("shift", shift_),), ), lambda x: Conditions([x]), )
def root_folder(course: IdResourceEndpoint): '''For a given course enpoint, return its root folder object >>> import canvasapi, tokenmanager >>> token = tokenmanager.get_tokens().canvas >>> api = canvasapi.api.get_api( ... 'https://example.instructure.com/api/v1', token ... ) >>> c, *_ = canvasapi.course.all_courses(api()) >>> root = canvasapi.course.root_folder(c) Will return Null object if no root folder is found. ''' return pipe( folders(course), filter(lambda f: f.data['parent_folder_id'] is None), maybe_first, )
def predict(self, x): x = check_array(x) competence_region_labels = pipe( self.competence_region_classifier.predict(x), map(self.get_first_trained_competence_region), list ) return ( pd.DataFrame({"data": x.tolist(), "competence": competence_region_labels}) .groupby("competence")["data"] .transform( lambda samples: self.clf_by_label[samples.name].predict( samples.to_list() ) ) .to_numpy() )
def calculate_coverage(rules: Collection[Rule], x_train): feature_ranges = np.ptp(x_train, axis=0) total_area = 0 max_area = reduce(lambda a, b: a * b)(feature_ranges) for rule in rules: bounded_rule = bound_rule(rule, x_train) this_rule_statement_ranges = [] for feature, statements in bounded_rule.get_statements_by_feature().items(): sorted_thresholds = pipe( statements, map(lambda s: s.threshold), sorted, list ) this_rule_statement_ranges.append( np.abs(sorted_thresholds[-1] - sorted_thresholds[0]) ) total_area = total_area + reduce(lambda a, b: a * b)(this_rule_statement_ranges) return 1 if total_area / max_area > 1 else total_area / max_area
def migrate(func): """Migrate the meta.yaml files using a callback function. This function reads and writes to the meta.yaml files. Args: func: the callback function which takes the YAML dictionary and returns a new dictionary Returns: a dictionary with file names as keys and values as dictionaries of the updated YAML data. """ return pipe( get_yaml_data(), dict, valmap(func), do(itemmap(lambda x: write_yaml_data(*x))) )
def make_upload_chart(gfunc, yaml_path, json_path, title): """Create an upload chart Args: gfunc: the group by function yaml_path: the template path json_path: the path to write to title: the title of the chart Returns: the chart JSON """ return pipe( gfunc, groupby_count, lambda data: list(data.items()), lambda data: sorted(data, key=lambda item: (-item[1], item[0])), lambda data: j2_to_json(yaml_path, json_path, data=data, title=title), )
def parse_course_metadata(regexes: list, course_dict: dict): r''' Examples: >>> regexes = [ ... {'key': 'name', ... 'regex: r'^(?P<code>\S+) (?P<name>.*?) (?P<section>\S+)$'}, ... {'key': 'course_code', ... 'regex': r'^(?P<code>\S+) (?P<name>.*?) (?P<section>\S+)$'} ... ] >>> course_dict = { ... 'name': 'CS102 CompSci II S01', ... } >>> parse_course_metadata(regexes, course_dict) == { ... 'code': 'CS102', 'name': 'CompSci II', 'section': 'S01', ... } True ''' def get_course_value(regex_dict): if 'key' in regex_dict: if regex_dict['key'] in course_dict: return course_dict[regex_dict['key']] elif 'keys' in regex_dict: return _.get_in(regex_dict['keys'], course_dict) def transform_year(d): if 'year' in d: return _.assoc(d, 'year', int(d['year'])) return d return _.pipe( regexes, _.map(lambda d: (get_course_value(d), re.compile(d['regex']))), _.filter(_.first), lcommon.vmap(lambda value, regex: regex.search(value)), _.filter(None), _.map(lambda m: m.groupdict()), _.map(transform_year), tuple, reversed, tuple, lambda dicts: _.merge(*(dicts or [{}])), )
def view(self): latent = latent_df.hvplot.scatter( x="Component 1", y="Component 2", color="diagnosis", title="Latent Space of Heartbeat FFT", width=800, height=300, tools=["tap"], ) stream = hv.streams.Selection1D(source=latent) reg = hv.DynamicMap(self.update, kdims=[], streams=[stream]) audio = pn.widgets.Audio(name="Audio", value=pipe(self.files.value)) return pn.Column(latent, reg, audio)
def make_csv(columns, number, size, filename): return pipe( 'data.json', lambda x: loadfn(x, cls=MontyDecoder)[:number], map( lambda x: assoc( x, key='formula', value=x['final_str'].composition.reduced_formula ), ), list, lambda x: pandas.DataFrame(x), lambda x: x[columns], lambda x: x.to_csv('tmp.csv', index=False), lambda _: pandas.read_csv('tmp.csv', na_values=['None', 'na']), lambda x: x.dropna().reset_index(drop=True).ix[:size], lambda x: x.to_csv(filename, index=False) )
def from_file_path(cls, file_path: FilePath, sheet_name: str, *, row_limit: int = 100): """Help function to populate the columns of a sheet.""" wb = get_wb(file_path) ws = wb[sheet_name] rows = tz.take(row_limit, ws.rows) header = next(rows) names = [c.value for c in header] letters = [c.column_letter for c in header] indices = [c.column for c in header] data_types = tz.pipe( rows # For each row, create a dict usng names has keys , tz.map(lambda row: dict(zip(names, row))) # Get the .xlsx data_type for each cell , tz.map(tz.valmap(lambda cell: cell.data_type)) # Combine cells into a list per column , tz.merge_with(list) # Count the cells for each data type in the column , tz.valmap(tz.frequencies) # Consolidate types , tz.valmap(lambda freq: ( # If at least 1 "d" "date" if "d" in freq else # If at least 1 "s" "text" if "s" in freq else # If at least 1 "n" "number" if "n" in freq else str(freq))), lambda d: [v for k, v in d.items()]) cols = [ Col(name=N, letter=L, index=I, data_type=D) for N, L, I, D in zip(names, letters, indices, data_types) ] return cls(name=sheet_name, cols=cols)
def read_redirects(yaml_path=TOC_PATH) -> dict: """ Generates redirect mapping of old URL to new URL: { 'ch/04/cleaning_intro.html': 'ch/05/cleaning_intro.html', ... } """ with open(yaml_path) as f: data = yaml.load(f) return t.pipe( data, t.map(_get_redirects), t.filter(t.identity), t.concat, _merge_redirects, )
def sample_url_line_delimited(data, lines=5, encoding='utf-8'): """Get a size `length` sample from an URL CSV or URL line-delimited JSON. Parameters ---------- data : URL(CSV) A hosted CSV lines : int, optional, default ``5`` Number of lines to read into memory """ with closing(urlopen(data.url)) as r: raw = pipe(r, take(lines), map(bytes.strip), curry(codecs.iterdecode, encoding=encoding), b'\n'.decode(encoding).join) with tmpfile(data.filename) as fn: with codecs.open(fn, 'wb', encoding=encoding) as f: f.write(raw) yield fn
def time_ratio(data): """Calcuate the sim_time over wall_time ration """ def not0(value): """Set to 1e-10 if 0 """ if value == 0: return 1e-10 return value return pipe( data[-1], juxt( lambda x: x.get("sim_time", x.get("time")), lambda x: x.get("wall_time", x.get("time")), ), lambda x: float(x[1]) / not0(float(x[0])), )
def calc_eta(coords, delta=1.0, radius=2.5): """Calculate a fake phase field for testing Phase field is a circle centered at 0, 0 of radius r, eta = 1 in the circle and 0 outside. Args: coords: the Sfepy coordinate array delta: interface width radius: radius of the circle Returns: the value of the phase field """ return pipe( coords, lambda x: np.sqrt(x[:, 0] ** 2 + x[:, 1] ** 2), lambda x: 0.5 * (1 + np.tanh((-x + radius) * 2 / delta)), )
def sample(self, sample_graphs): """sample.""" # pareto filter using similarity of the dataset for initial seed costs = self.sim_cost_estimator.compute(sample_graphs) seed_graphs = get_pareto_set(sample_graphs, costs) # run optimization in parallel pareto_graphs_list = self._optimize_parallel(seed_graphs) self._log_result(pareto_graphs_list) # join all pareto sets pareto_set_graphs = pipe(pareto_graphs_list, concat, list) # pareto filter using similarity of the solutions pareto_set_costs = self.sim_cost_estimator.compute(pareto_set_graphs) sel_pareto_set_graphs = get_pareto_set(pareto_set_graphs, pareto_set_costs) logger.info('#constructed graphs:%5d' % (len(sel_pareto_set_graphs))) return sel_pareto_set_graphs
def get_bag(build_dir: Path, base_dtype: str = "xml") -> db.Bag: """possible to do any text pre-processing here""" dtype_path = get_datapaths(build_dir).get(base_dtype) schema = get_schema(build_dir) filepaths = dtype_path.glob(f"**/*.{base_dtype}") _update_authors = flip(update_in(func=flatten_authors), ("authors", )) _update_keywords = lambda d: pipe( d, *[ update_in(func=split_keywords, keys=[kw]) for kw in (col for col in d.keys() if col.endswith("_keywords")) ], ) return (db.from_sequence(filepaths).map(partial( load_xml, schema)).map(_update_authors).map(_update_keywords))
def func(min_xyz, max_xyz): return pipe( dict(z_points=(max_xyz[2], min_xyz[2])) if len(min_xyz) == 3 else dict(), lambda x: subdomain_func( x_points=x_points_f(min_xyz, max_xyz), y_points=(max_xyz[1], min_xyz[1]), **x, ), lambda x: Function(f"{name}_x_points", x), lambda x: domain.create_region( f"region_{name}_points", f"vertices by {name}_x_points", "vertex", functions=Functions([x]), ), lambda x: EssentialBC( f"{name}_points_BC", x, points_dict_f(min_xyz, max_xyz) ), )
def read_url_map(yaml_path=TOC_PATH) -> dict: """ Generates mapping from each URL to its previous and next URLs in the textbook. The dictionary looks like: { 'ch/01/some_page.html' : { 'prev': 'about.html', 'next': 'ch/01/foo.html', }, ... } """ return t.pipe( read_url_list(yaml_path), _sliding_three, t.map(_adj_pages), t.merge(), )
def fit_disc(x_data, y_data, redundancy_func): """Fit the discretized data. Fit the data after the data has already been discretized. Args: x_data: the discretized mircrostructure field y_data: the discretized response field redundancy_func: helps remove redundancies in the coefficient matrix Returns: the chunked coefficient matrix based on the chunking of local state space from the discretized mircrostructure field >>> make_data = lambda s, c: da.from_array( ... np.arange(np.prod(s), ... dtype=float).reshape(s), ... chunks=c ... ) >>> matrix = fit_disc(make_data((6, 4, 4, 3), (2, 4, 4, 1)), ... make_data((6, 4, 4), (2, 4, 4)), ... lambda _: (slice(None),)) >>> print(matrix.shape) (4, 4, 3) >>> print(matrix.chunks) ((4,), (4,), (1, 1, 1)) >>> assert np.allclose(matrix.compute()[0, 0, 0], 5. / 18.) """ chunks = lambda x: (None, ) * (len(x.shape) - 1) + (x_data.chunks[-1], ) return pipe( [x_data, y_data], fmap(dafftn(axes=faxes(x_data))), list, lambda x: fit_fourier(*x, redundancy_func), lambda x: da.from_array(x, chunks=chunks(x)), )
def convolve(f_data, g_data=None, device=torch.device("cpu")): """ Returns auto-correlation or cross-correlation of the input spatial fields """ ndim = f_data.ndim if g_data is not None: g_data = to_torch(g_data, device) func = lambda x: torch_rfft(signal_ndim=ndim)(g_data) else: func = lambda x: x return pipe(f_data, to_torch(device=device), torch_rfft(signal_ndim=ndim), lambda x: mult(x, conjugate(func(x))), torch_irfft(signal_ndim=ndim), lambda x: x.cpu().numpy(), fftshift, lambda x: fabs(x).astype(float))
def get_stress_strain(problem, shape, str_): """Get the stress or strain field depending on the str_ argument Args: problem: the Sfepy problem shape: the shape of the domain str_: string passed to problem.evaluate to extract the stress or strain Returns the reshaped stress or strain field """ return pipe( np.squeeze( problem.evaluate( str_.format(dim=len(shape)), mode="el_avg", copy_materials=False ) ), lambda x: np.reshape(x, (shape + x.shape[-1:])), )
def get_problem(u_field, v_field, calc_stiffness, calc_prestress, delta_x): """Get the problem Args: u_field: the displacement field v_field: the test function field calc_stiffness: a functioin to calcuate the stiffness tensor calc_prestress: a function to calculate the prestress tensor delta_x: the mesh spacing Returns: the Sfepy problem """ return pipe( get_terms(u_field, v_field, calc_stiffness, calc_prestress), lambda x: Equation("balance_of_forces", Terms([x[0], x[1]])), lambda x: Problem("elasticity", equations=Equations([x])), do(lambda x: x.time_update(ebcs=get_bcs(v_field.field.region.domain, delta_x))), do(lambda x: x.set_solver(get_nls(x.get_evaluator()))), )
def write_chart_json(j2_file_name, item): """Write a chart JSON file. Args: j2_file_name: the name of the Jinja template file item: a (benchmark_id, chart_dict) pair Returns: returns the (filepath, json_data) pair """ file_name = fcompose( lambda x: r"{0}_{1}".format(x, j2_file_name), lambda x: re.sub(r"([0-9]+[abcd])\.(.+)\.yaml\.j2", r"\1\2.json", x), ) return pipe( item[0], file_name, lambda file_: os.path.join(get_path(), "../_data/charts", file_), write_json(item[1]), )
def move_module_items_from_data(course: Endpoint, module_data: list, dry_run: bool = False): item_moved = False # Move module items that are in the wrong module for mod_dict in module_data: module = find_module(course, mod_dict) item_ids = pipe( items(module), map(lambda i: (i.data[i.id_key], i)), dict, ) reset_cache = False for item_dict in mod_dict['items']: item = find_item(course, item_dict) if item and item.data[item.id_key] not in item_ids: # Need to move this item to its correct parent parent_id = module.data[module.id_key] log.info(f'Moving item "{item_dict["title"]}" to' f' module "{mod_dict["name"]}"') log.debug(f'Module:\n{pprint.pformat(module.data)}\n' f'Item:\n{pprint.pformat(item.data)}\n') if not dry_run: update_endpoint( item, {'module_id': parent_id}, do_refresh=False, # This item is no longer in the # same parent module. So a # refresh of the same endpoint # would 404 ) else: log.info(f'... DRY RUN') reset_cache = True item_moved = True if reset_cache: # Reset items cache for this module (to reflect item # movement) items.reset_cache(module) if not item_moved: log.info('No items needed moving')
def get_subnets(inpath, slash, from_clipboard, to_clipboard, yaml): ''''Given a list of IP addresses from a file path (INPATH), the clipboard (-C), or stdin (if nothing provided), print in sorted order (to stdout unless -C is provided) all the IP networks of a certain size (-s {16, 24}) ''' content = get_input_content(inpath, from_clipboard) def get_network(ip): return ip_interface(ip + f'/{slash}').network return pipe( get_ips_from_str(content), map(get_network), set, sorted, map(str), compose(dump_yaml, list) if yaml else '\n'.join, print if not to_clipboard else cb_copy_ensure_nl, )