def _get_filters(filters): opening_clause = concatv( ["posting_date < %(from_date)s"], ["customer = %(customer)s"] if filters.customer else [], ["loyalty_program = %(loyalty_program)s"] if filters.loyalty_program else [], ) period_clause = concatv( ["posting_date BETWEEN %(from_date)s AND %(to_date)s"], ["customer = %(customer)s"] if filters.customer else [], ["loyalty_program = %(loyalty_program)s"] if filters.loyalty_program else [], ) values = merge( pick(["customer", "loyalty_program"], filters), { "from_date": filters.date_range[0], "to_date": filters.date_range[1] }, ) return ( { "opening_clause": " AND ".join(opening_clause), "period_clause": " AND ".join(period_clause), }, values, )
def _get_filters(filters): item_codes = ( compose( list, partial(filter, lambda x: x), partial(map, lambda x: x.strip()), lambda x: x.split(","), )(filters.item_codes) if filters.item_codes else None ) clauses = concatv( ["i.disabled = 0"], ["i.item_code IN %(item_codes)s"] if item_codes else [] ) bin_clauses = concatv( ["b.item_code = i.item_code"], ["b.warehouse = %(warehouse)s"] if filters.warehouse else [], ) defaults_clauses = concatv(["id.parent = i.name"], ["id.company = %(company)s"]) supplier_clauses = concatv( ["isp.parent = i.name"], ["isp.supplier = id.default_supplier"] ) return ( { "clauses": " AND ".join(clauses), "bin_clauses": " AND ".join(bin_clauses), "defaults_clauses": " AND ".join(defaults_clauses), "supplier_clauses": " AND ".join(supplier_clauses), }, merge(filters, {"item_codes": item_codes} if item_codes else {}), )
def _get_filters(filters): if not filters.get("company"): frappe.throw(_("Company is required to generate report")) clauses = concatv( ["TRUE"], ["i.item_group = %(item_group)s"] if filters.item_group else [], ["i.name = %(item_code)s"] if filters.item_code else [], ["id.default_supplier = %(default_supplier)s"] if filters.default_supplier else [], ) warehouse_clauses = concatv( ["item_code = %(item_code)s"] if filters.item_code else [], ["warehouse = %(warehouse)s"] if filters.warehouse else [ "warehouse IN (SELECT name FROM `tabWarehouse` WHERE company = %(company)s)" ], ) values = merge( filters, { "price_list": frappe.db.get_value("Buying Settings", None, "buying_price_list"), "start_date": filters.start_date or today(), "end_date": filters.end_date or today(), }, ) return ( { "clauses": " AND ".join(clauses), "warehouse_clauses": " AND ".join(warehouse_clauses), }, values, )
def _get_filters(filters): clauses = concatv( ["i.disabled = 0"], ["i.brand = %(brand)s"] if filters.brand else [], ["i.item_group = %(item_group)s"] if filters.item_group else [], ) bin_clauses = concatv( ["b.item_code = i.item_code"], ["b.warehouse = %(warehouse)s"] if filters.warehouse else [], ) return ( {"clauses": " AND ".join(clauses), "bin_clauses": " AND ".join(bin_clauses)}, filters, )
def _get_filters(filters): def get_branches(): if any(role in ["Accounts Manager"] for role in frappe.get_roles()): return split_to_list(filters.branches) user_branch = get_user_branch() if (any(role in ["Branch User", "Branch Stock"] for role in frappe.get_roles()) and user_branch): return [user_branch] frappe.throw( _("Manager privilege or Branch User / Branch Stock role required")) branches = get_branches() clauses = concatv( [ "st.outgoing_datetime <= %(to_date)s", "IFNULL(st.incoming_datetime, CURRENT_DATE) >= %(from_date)s", ], ["st.docstatus = 1"] if not cint(filters.show_all) else [], [ "(st.source_branch IN %(branches)s OR st.target_branch IN %(branches)s)" ] if branches else [], ) values = merge( pick(["from_date", "to_date"], filters), {"branches": branches} if branches else {}, ) return " AND ".join(clauses), values
def symbol_ownership_map(self): rows = sa.select(self.equity_symbol_mappings.c).execute().fetchall() mappings = {} for row in rows: mappings.setdefault((row.company_symbol, row.share_class_symbol), []).append( SymbolOwnership( pd.Timestamp(row.start_date, unit="ns", tz="utc"), pd.Timestamp(row.end_date, unit="ns", tz="utc"), row.sid, row.symbol, ) ) return valmap( lambda v: tuple( SymbolOwnership(a.start, b.start, a.sid, a.symbol) for a, b in sliding_window( 2, concatv( sorted(v), # concat with a fake ownership object to make the last # end date be max timestamp [SymbolOwnership(pd.Timestamp.max.tz_localize("utc"), None, None, None)], ), ) ), mappings, factory=lambda: mappings, )
def _get_columns(): return list( concatv( [ make_column("outgoing_date", type="Date", width=90), make_column("incoming_date", type="Date", width=90), make_column("name", "Doc Name", type="Link", options="Stock Transfer", width=150), make_column("workflow_state", "Status", width=90), make_column( "item_code", type="Link", options="Item", width=150), make_column("item_name", width=180), make_column("qty", type="Float", width=90), ], [ make_column( "outgoing_stock_entry", "Outgoing Doc", type="Link", options="Stock Entry", width=150, ), make_column( "incoming_stock_entry", "Incoming Doc", type="Link", options="Stock Entry", width=150, ), ] if any(role in ["Accounts Manager"] for role in frappe.get_roles()) else [], ))
def test_id_macro_dataset(self): expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule=no_deltas_rules.ignore, ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(equities=asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) nassets = len(asset_info) expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value',), ) assert_frame_equal(result, expected, check_dtype=False)
def _get_columns(): return list( concatv( [ make_column( "parent_item_group", "Parent Item Group", "Link", options="Item Group", ), make_column( "item_group", "Item Group", "Link", options="Item Group", ), make_column( "name", "Item Code", "Link", options="Item", ), make_column("description", "Description", width=350), make_column("barcode", "Barcode", width=180), ], [make_column(_get_key(x), x, "Float") for x in _get_warehouses()], [ make_column("qty_sold", "Qty Sold", "Float"), make_column("sold_valuation", "Sold Valuation", "Currency"), ], ))
def merge_ownership_periods(mappings): """ Given a dict of mappings where the values are lists of OwnershipPeriod objects, returns a dict with the same structure with new OwnershipPeriod objects adjusted so that the periods have no gaps. Orders the periods chronologically, and pushes forward the end date of each period to match the start date of the following period. The end date of the last period pushed forward to the max Timestamp. """ return valmap( lambda v: tuple( OwnershipPeriod( a.start, b.start, a.sid, a.value, ) for a, b in sliding_window( 2, concatv( sorted(v), # concat with a fake ownership object to make the last # end date be max timestamp [OwnershipPeriod( pd.Timestamp.max.tz_localize('utc'), None, None, None, )], ), ) ), mappings, )
def compute_sorted_frame(df, order_by, group_by=(), timecontext=None, **kwargs): computed_sort_keys = [] sort_keys = list(toolz.concatv(group_by, order_by)) ascending = [getattr(key.op(), 'ascending', True) for key in sort_keys] new_columns = {} for i, key in enumerate(map(operator.methodcaller('op'), sort_keys)): computed_sort_key, temporary_column = compute_sort_key( key, df, timecontext, **kwargs) computed_sort_keys.append(computed_sort_key) if temporary_column is not None: new_columns[computed_sort_key] = temporary_column result = df.assign(**new_columns) result = result.sort_values(computed_sort_keys, ascending=ascending, kind='mergesort') # TODO: we'll eventually need to return this frame with the temporary # columns and drop them in the caller (maybe using post_execute?) ngrouping_keys = len(group_by) return ( result, computed_sort_keys[:ngrouping_keys], computed_sort_keys[ngrouping_keys:], )
def _get_filters(filters): date_field_map = { "Accepted": "request_datetime", "Transfered": "transfer_datetime", "Returned": "return_datetime", "Failed": "reverse_datetime", "Created": "creation", "Modified": "modified", } clauses = concatv( [ "docstatus = 1", "DATE({date_field}) BETWEEN %(from_date)s AND %(to_date)s".format( date_field=date_field_map.get(filters.date_type, "creation") ), ], ["bank_account = %(bank_account)s"] if filters.bank_account else [], ["bank_mode = %(bank_mode)s"] if filters.bank_mode else [], ) values = merge( pick(["bank_account", "bank_mode"], filters), {"from_date": filters.date_range[0], "to_date": filters.date_range[1]}, ) return " AND ".join(clauses), values
def to_bars(node): children = node.get('children') if not children: return [node] new_children = concat(to_bars(child) for child in children) bar = dissoc(node, 'children') return list(concatv(new_children, [bar]))
def __new__(mcls, name, bases, dict_): self = super().__new__(mcls, name, bases, dict_) if len(bases) and bases[0] is ADT: self._typevars = dict_._typevars self._constructors = tuple(dict_._constructors.values()) constructors = set(self._constructors) for constructor in constructors: types = concatv( constructor._args, constructor._kwargs.values(), ) for t in types: if isinstance(t, RecursiveType) and t._name != name: raise TypeError( 'recursive type name must be the same as the type' ' name, %r != %r' % ( t._name, name, ), ) if t in constructors: raise TypeError( 'constructor %r has arguments that are other' ' constructors' % constructor, ) if not self._typevars: return adt(self, ()) return self
def validate(self): clauses = concatv( [ "docstatus = 1", "name != %(name)s", "company = %(company)s", "pos_profile = %(pos_profile)s", "period_from <= %(period_to)s", "period_to >= %(period_from)s", ], ["user = %(user)s"] if self.user else [], ) existing = frappe.db.sql( """ SELECT 1 FROM `tabPOS Closing Voucher` WHERE {clauses} """.format(clauses=" AND ".join(clauses)), values={ "name": self.name, "company": self.company, "pos_profile": self.pos_profile, "user": self.user, "period_from": get_datetime(self.period_from), "period_to": get_datetime(self.period_to), }, ) if existing: frappe.throw( "Another POS Closing Voucher already exists during this time frame." )
def merge(*exprs, **kwargs): if len(exprs) + len(kwargs) == 1: # we only have one object so don't need to construct a merge if exprs: # we only have a positional argumnent, return it unchanged return exprs[0] if kwargs: # we only have a single keyword argument, label it and return it [(k, v)] = kwargs.items() return v.label(k) # label all the kwargs and sort in key order exprs = tuple( concatv( (_wrap(expr, '_%s' % n) for n, expr in enumerate(exprs)), (label(_wrap(v, k), k) for k, v in sorted(kwargs.items(), key=first)), )) if all(ndim(expr) == 0 for expr in exprs): raise TypeError('cannot merge all scalar expressions') result = Merge( exprs, varargsexpr(exprs), maxshape(map(shape, exprs)), ) if not isdistinct(result.fields): raise ValueError( "Repeated columns found: " + ', '.join( k for k, v in frequencies(result.fields).items() if v > 1), ) return result
def _get_columns(columns, prices): return list( concatv( columns[:2], [{ "fieldname": "supplier", "fieldtype": "Link", "width": 100, "label": "Supplier", "options": "Supplier", }], columns[2:7], [ { "fieldname": "buying_price", "fieldtype": "Currency", "width": 100, "label": prices.get("buying"), }, { "fieldname": "selling_price", "fieldtype": "Currency", "width": 100, "label": prices.get("selling"), }, ], columns[7:], ))
def _collect_variables(names, expressions=None): """ Map labels and expressions to registered variables. Handles argument matching. Example: _collect_variables(names=['zones', 'zone_id'], expressions=['parcels.zone_id']) Would return a dict representing: {'parcels': <DataFrameWrapper for zones>, 'zone_id': <pandas.Series for parcels.zone_id>} Parameters ---------- names : list of str List of registered variable names and/or labels. If mixing names and labels, labels must come at the end. expressions : list of str, optional List of registered variable expressions for labels defined at end of `names`. Length must match the number of labels. Returns ------- variables : dict Keys match `names`. Values correspond to registered variables, which may be wrappers or evaluated functions if appropriate. """ # Map registered variable labels to expressions. if not expressions: expressions = [] offset = len(names) - len(expressions) labels_map = dict(toolz.concatv( toolz.compatibility.zip(names[:offset], names[:offset]), toolz.compatibility.zip(names[offset:], expressions))) all_variables = toolz.merge(_INJECTABLES, _TABLES) variables = {} for label, expression in labels_map.items(): # In the future, more registered variable expressions could be # supported. Currently supports names of registered variables # and references to table columns. if '.' in expression: # Registered variable expression refers to column. table_name, column_name = expression.split('.') table = get_table(table_name) variables[label] = table.get_column(column_name) else: thing = all_variables[expression] if isinstance(thing, (_InjectableFuncWrapper, TableFuncWrapper)): # Registered variable object is function. variables[label] = thing() else: variables[label] = thing return variables
def compute_sorted_frame(df, order_by, group_by=(), **kwargs): computed_sort_keys = [] sort_keys = list(toolz.concatv(group_by, order_by)) ascending = [getattr(key.op(), 'ascending', True) for key in sort_keys] new_columns = {} for i, key in enumerate(map(operator.methodcaller('op'), sort_keys)): computed_sort_key, temporary_column = compute_sort_key( key, df, **kwargs ) computed_sort_keys.append(computed_sort_key) if temporary_column is not None: new_columns[computed_sort_key] = temporary_column result = df.assign(**new_columns) result = result.sort_values( computed_sort_keys, ascending=ascending, kind='mergesort' ) # TODO: we'll eventually need to return this frame with the temporary # columns and drop them in the caller (maybe using post_execute?) ngrouping_keys = len(group_by) return ( result, computed_sort_keys[:ngrouping_keys], computed_sort_keys[ngrouping_keys:], )
def _get_filters(doctype, filters): is_include = filters.vat_type not in ["Standard Rated", "Zero Rated"] vat_exempt_accounts = [ x[0] for x in frappe.get_all( "POS Bahrain Settings Tax Category", filters={"category": filters.vat_type} if is_include else {}, fields=["account"], as_list=1, ) ] if not vat_exempt_accounts: msg = "Please setup {}: <em>VAT Tax Categories</em>".format( frappe.get_desk_link("POS Bahrain Settings", "")) if filters.get("hide_error_message"): raise VatCategoryNotFound(msg) else: frappe.throw(msg, exc=VatCategoryNotFound) inv_clauses = [ "d.docstatus = 1", "d.posting_date BETWEEN %(from_date)s AND %(to_date)s", "IFNULL(dt.account_head, '') != ''", "dt.account_head {} %(tax_accounts)s".format( "IN" if is_include else "NOT IN"), ] glp_clauses = concatv( inv_clauses, ["d.payment_type IN %(payment_types)s", "a.account_type = 'Tax'"]) values = merge( pick(["vat_type"], filters), { "from_date": filters.date_range[0], "to_date": filters.date_range[1], "tax_accounts": vat_exempt_accounts, "payment_types": ["Incoming"] if doctype == "Sales Invoice" else ["Outgoing", "Internal Transfer"], }, ) return ( { "doctype": doctype, "item_doctype": "{} Item".format(doctype), "tax_doctype": "{} Taxes and Charges".format("Sales" if doctype == "Sales Invoice" else "Purchase"), "party_name": "{}_name".format("customer" if doctype == "Sales Invoice" else "supplier"), "invoice_clauses": " AND ".join(inv_clauses), "glp_clauses": " AND ".join(glp_clauses), }, values, )
def test_id_macro_dataset(self): """ input (self.macro_df) asof_date timestamp value 0 2014-01-01 2014-01-01 0 3 2014-01-02 2014-01-02 1 6 2014-01-03 2014-01-03 2 output (expected): value 2014-01-01 Equity(65 [A]) 0 Equity(66 [B]) 0 Equity(67 [C]) 0 2014-01-02 Equity(65 [A]) 1 Equity(66 [B]) 1 Equity(67 [C]) 1 2014-01-03 Equity(65 [A]) 2 Equity(66 [B]) 2 Equity(67 [C]) 2 """ asset_info = asset_infos[0][0] nassets = len(asset_info) with tmp_asset_finder() as finder: expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product((self.macro_df.timestamp, finder.retrieve_all(asset_info.index))), columns=("value",), ) self._test_id(self.macro_df, self.macro_dshape, expected, finder, ("value",))
def _collect_variables(names, expressions=None): """ Map labels and expressions to registered variables. Handles argument matching. Example: _collect_variables(names=['zones', 'zone_id'], expressions=['parcels.zone_id']) Would return a dict representing: {'parcels': <DataFrameWrapper for zones>, 'zone_id': <pandas.Series for parcels.zone_id>} Parameters ---------- names : list of str List of registered variable names and/or labels. If mixing names and labels, labels must come at the end. expressions : list of str, optional List of registered variable expressions for labels defined at end of `names`. Length must match the number of labels. Returns ------- variables : dict Keys match `names`. Values correspond to registered variables, which may be wrappers or evaluated functions if appropriate. """ # Map registered variable labels to expressions. if not expressions: expressions = [] offset = len(names) - len(expressions) labels_map = dict( toolz.concatv(toolz.compatibility.zip(names[:offset], names[:offset]), toolz.compatibility.zip(names[offset:], expressions))) all_variables = toolz.merge(_INJECTABLES, _TABLES) variables = {} for label, expression in labels_map.items(): # In the future, more registered variable expressions could be # supported. Currently supports names of registered variables # and references to table columns. if '.' in expression: # Registered variable expression refers to column. table_name, column_name = expression.split('.') table = get_table(table_name) variables[label] = table.get_column(column_name) else: thing = all_variables[expression] if isinstance(thing, (_InjectableFuncWrapper, TableFuncWrapper)): # Registered variable object is function. variables[label] = thing() else: variables[label] = thing return variables
def path(graph, source, target, excluded_edges=None, ooc_types=ooc_types): """ Path of functions between two types """ if not isinstance(source, type): source = type(source) if not isinstance(target, type): target = type(target) for cls in concatv(source.mro(), _virtual_superclasses): if cls in graph: source = cls break # If both source and target are Out-Of-Core types then restrict ourselves # to the graph of out-of-core types if ooc_types: oocs = tuple(ooc_types) if issubclass(source, oocs) and issubclass(target, oocs): graph = graph.subgraph( [n for n in graph.nodes() if issubclass(n, oocs)]) with without_edges(graph, excluded_edges) as g: pth = nx.shortest_path(g, source=source, target=target, weight='cost') edge = adjacency(graph) def path_part(src, tgt): node = edge[src][tgt] return PathPart(src, tgt, node['func'], node['cost']) return map(path_part, pth, pth[1:])
def path(graph, source, target, excluded_edges=None, ooc_types=ooc_types): """ Path of functions between two types """ if not isinstance(source, type): source = type(source) if not isinstance(target, type): target = type(target) for cls in concatv(source.mro(), _virtual_superclasses): if cls in graph: source = cls break # If both source and target are Out-Of-Core types then restrict ourselves # to the graph of out-of-core types if ooc_types: oocs = tuple(ooc_types) if issubclass(source, oocs) and issubclass(target, oocs): graph = graph.subgraph([n for n in graph.nodes() if issubclass(n, oocs)]) with without_edges(graph, excluded_edges) as g: pth = nx.shortest_path(g, source=source, target=target, weight='cost') # 参考 https://networkx.github.io/documentation/latest/release/migration_guide_from_1.x_to_2.0.html edge = graph.edges # graph.edge -> graph.edges def path_part(src, tgt): node = edge[src,tgt] # node = edge[src][tgt] return PathPart(src, tgt, node['func'], node['cost']) return map(path_part, pth, pth[1:])
def add_stats(problem): problem["libraries"] = [{ **library, "biggest_books": sorted(library["l_books_ids"], key=lambda book_id: problem["books_score"][book_id], reverse=True) } for library in problem["libraries"]] problem["libraries"] = [{ **library, "s_books_ids": set(library["l_books_ids"]) } for library in problem["libraries"]] problem["book_specialness"] = { book_id: sum([ 1 for other_library in problem["libraries"] if book_id in other_library["s_books_ids"] ]) for book_id in set( toolz.concatv( [library["l_books_ids"] for library in problem["libraries"]])) } problem["libraries"] = [{ **library, "specialness": -sum( sum(problem["book_specialness"][l_book_id]) for l_book_id in library["l_books_ids"]) } for library in problem["libraries"]]
def path(graph, source, target, excluded_edges=None, ooc_types=ooc_types): """ Path of functions between two types """ if not isinstance(source, type): source = type(source) if not isinstance(target, type): target = type(target) for cls in concatv(source.mro(), _virtual_superclasses): if cls in graph: source = cls break # If both source and target are Out-Of-Core types then restrict ourselves # to the graph of out-of-core types if ooc_types: oocs = tuple(ooc_types) if issubclass(source, oocs) and issubclass(target, oocs): graph = graph.subgraph([n for n in graph.nodes() if issubclass(n, oocs)]) with without_edges(graph, excluded_edges) as g: pth = nx.shortest_path(g, source=source, target=target, weight='cost') edge = graph.edge def path_part(src, tgt): node = edge[src][tgt] return PathPart(src, tgt, node['func'], node['cost']) return map(path_part, pth, pth[1:])
def paginate_url(url, do_request): """Given a DZ_RS_URL crawl through pages using pagination logic""" # we can't cache yet cookies and POST requests do_request = partial(do_request, use_cache=False) def request_page(prefix, url, page_number): data = { prefix: prefix, '{}:menu1'.format(prefix): 'VII', '{}:menu2'.format(prefix): 'SEJ_ZAP_KON | MAG | DOK | fa_dokument | fa_sklicSeje | fa_program | fa_sklep', '{}:txtQueryString'.format(prefix): '', '{}:tableEx1:goto1__pagerGoText'.format(prefix): str(page_number), '{}:tableEx1:goto1__pagerGoButton'.format(prefix): 'Go', '{}:tableEx1:goto1__pagerGoButton.x'.format(prefix): '8', '{}:tableEx1:goto1__pagerGoButton.y'.format(prefix): '10', 'javax.faces.ViewState': doc('input#javax\.faces\.ViewState').attr('value'), } return do_request(url, method='post', data=data) # get first page doc = do_request(url) num_pages = int(re.search(r'(\d+)$', doc('.pagerDeluxe_text').text()).groups()[0]) logger.info('paginating', url=url, num_pages=num_pages) # prepare data for pagination pagination_form = doc('form') prefix = pagination_form.attr('id') url = DZ_RS_URL + pagination_form.attr('action') request_page = partial(request_page, prefix, url) # get the 2nd and the rest of the pages using pagination return toolz.concatv([doc], map(request_page, range(2, num_pages + 1)))
def special_closes_adhoc(self): lunar_new_years_eve = ( chinese_lunar_new_year_dates - pd.Timedelta(days=1) )[ np.in1d( chinese_lunar_new_year_dates.weekday, [TUESDAY, WEDNESDAY, THURSDAY, FRIDAY, SATURDAY], ) & (chinese_lunar_new_year_dates.year >= 2013) ].values def selection(arr, start, end): predicates = [] if start is not None: predicates.append(start.asm8 <= arr) if end is not None: predicates.append(arr < end.asm8) if not predicates: return arr return arr[np.all(predicates, axis=0)] return [ (time, selection(lunar_new_years_eve, start, end)) for (start, time), (end, _) in toolz.sliding_window( 2, toolz.concatv(self.regular_early_close_times, [(None, None)]), ) ]
def test_id_macro_dataset(self): expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) loader = BlazeLoader() ds = from_blaze( expr, loader=loader, no_deltas_rule='ignore', ) p = Pipeline() p.add(ds.value.latest, 'value') dates = self.dates asset_info = asset_infos[0][0] with tmp_asset_finder(asset_info) as finder: result = SimplePipelineEngine( loader, dates, finder, ).run_pipeline(p, dates[0], dates[-1]) nassets = len(asset_info) expected = pd.DataFrame( list(concatv([0] * nassets, [1] * nassets, [2] * nassets)), index=pd.MultiIndex.from_product(( self.macro_df.timestamp, finder.retrieve_all(asset_info.index), )), columns=('value', ), ) assert_frame_equal(result, expected, check_dtype=False)
def _get_filters(filters): branches = split_to_list(filters.branches) clauses = concatv( ["si.docstatus = 1"], ["si.os_branch IN %(branches)s"] if branches else [], [ "(si.update_stock = 1 OR sii.delivered_qty = sii.qty)", """ ( ( si.update_stock = 1 AND si.posting_date BETWEEN %(from_date)s AND %(to_date)s ) OR ( si.update_stock = 0 AND dn.posting_date BETWEEN %(from_date)s AND %(to_date)s ) ) """, ] if filters.report_type == "Collected" else [], [ "(si.update_stock = 0 OR sii.delivered_qty < sii.qty)", "si.posting_date BETWEEN %(from_date)s AND %(to_date)s", ] if filters.report_type == "Achieved" else [], ) values = merge( filters, {"branches": branches} if branches else {}, { "selling_pl": "Standard Selling", "min_selling_pl1": "Minimum Selling", "min_selling_pl2": "Minimum Selling 2", }, ) return " AND ".join(clauses), values
def execute_selection_dataframe( op, data, scope: Scope, timecontext: Optional[TimeContext], **kwargs ): selections = op.selections predicates = op.predicates sort_keys = op.sort_keys result = data # Build up the individual pandas structures from column expressions if selections: if all(isinstance(s.op(), ops.TableColumn) for s in selections): result = build_df_from_selection(selections, data, op.table.op()) else: result = build_df_from_projection( selections, op, data, scope=scope, timecontext=timecontext, **kwargs, ) if predicates: predicates = _compute_predicates( op.table.op(), predicates, data, scope, timecontext, **kwargs ) predicate = functools.reduce(operator.and_, predicates) assert len(predicate) == len( result ), 'Selection predicate length does not match underlying table' result = result.loc[predicate] if sort_keys: result, grouping_keys, ordering_keys = util.compute_sorted_frame( result, order_by=sort_keys, scope=scope, timecontext=timecontext, **kwargs, ) else: grouping_keys = ordering_keys = () # return early if we do not have any temporary grouping or ordering columns assert not grouping_keys, 'group by should never show up in Selection' if not ordering_keys: return result # create a sequence of columns that we need to drop temporary_columns = pd.Index( concatv(grouping_keys, ordering_keys) ).difference(data.columns) # no reason to call drop if we don't need to if temporary_columns.empty: return result # drop every temporary column we created for ordering or grouping return result.drop(temporary_columns, axis=1)
def test_novel_deltas_macro(self): asset_info = asset_infos[0][0] base_dates = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-04') ]) baseline = pd.DataFrame({ 'value': (0, 1), 'asof_date': base_dates, 'timestamp': base_dates, }) expr = bz.Data(baseline, name='expr', dshape=self.macro_dshape) deltas = bz.Data(baseline, name='deltas', dshape=self.macro_dshape) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap(pd.Timestamp, { '2014-01-03': repeat_last_axis( np.array([10.0, 10.0, 10.0]), nassets, ), '2014-01-06': repeat_last_axis( np.array([10.0, 10.0, 11.0]), nassets, ), }) cal = pd.DatetimeIndex([ pd.Timestamp('2014-01-01'), pd.Timestamp('2014-01-02'), pd.Timestamp('2014-01-03'), # omitting the 4th and 5th to simulate a weekend pd.Timestamp('2014-01-06'), ]) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=cal, start=cal[2], end=cal[-1], window_length=3, compute_fn=op.itemgetter(-1), )
def _group_times(all_days, times, tz, offset): elements = [ days_at_time(selection(all_days, start, end), time, tz, offset) for (start, time), ( end, _) in toolz.sliding_window(2, toolz.concatv(times, [(None, None)])) ] return elements[0].append(elements[1:])
def choosers_columns_used(self): """ Columns from the choosers table that are used for filtering. """ return list(toolz.unique(toolz.concatv( util.columns_in_filters(self.choosers_predict_filters), util.columns_in_filters(self.choosers_fit_filters))))
def execute_selection_dataframe(op, data, scope, timecontext: Optional[TimeContext], **kwargs): selections = op.selections predicates = op.predicates sort_keys = op.sort_keys result = data # Build up the individual pandas structures from column expressions if selections: data_pieces = [] for selection in selections: pandas_object = compute_projection( selection, op, data, scope=scope, timecontext=timecontext, **kwargs, ) data_pieces.append(pandas_object) new_pieces = [ piece.reset_index(level=list(range(1, piece.index.nlevels)), drop=True) if piece.index.nlevels > 1 else piece for piece in data_pieces ] result = pd.concat(new_pieces, axis=1) if predicates: predicates = _compute_predicates(op.table.op(), predicates, data, scope, **kwargs) predicate = functools.reduce(operator.and_, predicates) assert len(predicate) == len( result ), 'Selection predicate length does not match underlying table' result = result.loc[predicate] if sort_keys: result, grouping_keys, ordering_keys = util.compute_sorted_frame( result, order_by=sort_keys, scope=scope, **kwargs) else: grouping_keys = ordering_keys = () # return early if we do not have any temporary grouping or ordering columns assert not grouping_keys, 'group by should never show up in Selection' if not ordering_keys: return result # create a sequence of columns that we need to drop temporary_columns = pd.Index(concatv( grouping_keys, ordering_keys)).difference(data.columns) # no reason to call drop if we don't need to if temporary_columns.empty: return result # drop every temporary column we created for ordering or grouping return result.drop(temporary_columns, axis=1)
def execute_selection_dataframe(op, data, scope=None, **kwargs): selections = op.selections predicates = op.predicates sort_keys = op.sort_keys result = data # Build up the individual pandas structures from column expressions if selections: data_pieces = [] for selection in selections: pandas_object = compute_projection( selection, op, data, scope=scope, **kwargs ) data_pieces.append(pandas_object) new_pieces = [ piece.reset_index( level=list(range(1, piece.index.nlevels)), drop=True ) if piece.index.nlevels > 1 else piece for piece in data_pieces ] result = pd.concat(new_pieces, axis=1) if predicates: predicates = _compute_predicates( op.table.op(), predicates, data, scope, **kwargs ) predicate = functools.reduce(operator.and_, predicates) assert len(predicate) == len( result ), 'Selection predicate length does not match underlying table' result = result.loc[predicate] if sort_keys: result, grouping_keys, ordering_keys = util.compute_sorted_frame( result, order_by=sort_keys, scope=scope, **kwargs ) else: grouping_keys = ordering_keys = () # return early if we do not have any temporary grouping or ordering columns assert not grouping_keys, 'group by should never show up in Selection' if not ordering_keys: return result # create a sequence of columns that we need to drop temporary_columns = pd.Index( concatv(grouping_keys, ordering_keys) ).difference(data.columns) # no reason to call drop if we don't need to if temporary_columns.empty: return result # drop every temporary column we created for ordering or grouping return result.drop(temporary_columns, axis=1)
def render_tabular(api, options=None): """Entry point for the tabular reporter interface.""" # determine separator separator = options.get('report.separator', '\t') human = options.get('report.human') panel = options.get('report.panel') samples = options.get('report.samples') group = options.get('report.group') # read gene panel file if it has been set if panel: superblock_ids = [line.rstrip() for line in panel] else: superblock_ids = None # get sample ID, group and cutoff from metadata sample_query = limit_query(api.samples(), group=group, samples=samples) metadata = ((sample.id, sample.group_id, sample.cutoff) for sample in sample_query) # get the data base_query = limit_query( api.average_metrics(superblock_ids=superblock_ids), group=group, samples=samples) queries = [ metadata, base_query, api.diagnostic_yield(superblock_ids=superblock_ids, group_id=group, sample_ids=samples), api.sex_checker(group_id=group, sample_ids=samples) ] # group multiple queries by sample ID (first column) key_metrics = groupby(get(0), concat(queries)) # get the column names dynamically from the query headers = concatv(['sample_id', 'group_id', 'cutoff'], (column['name'] for column in base_query.column_descriptions), ['diagnostic yield', 'gender']) unique_headers = unique(headers) # iterate over all values, concat different query results, and keep # only the unique values (excluding second sample_id) data = (unique(concat(values)) for values in itervalues(key_metrics)) if human: # export key_metrics in a more human friendly format return tabulate(data, unique_headers) # yield headers return '\n'.join( cons('#' + separator.join(unique_headers), stringify_list(data, separator=separator)))
def _get_filters(filters): branches = split_to_list(filters.branch) clauses = concatv( ["s.docstatus = 1", "s.posting_date = %(posting_date)s"], ["s.os_branch IN %(branches)s"] if branches else [], ) values = merge(pick(["posting_date"], filters), {"branches": branches} if branches else {}) return " AND ".join(clauses), values
def alts_columns_used(self): """ Columns from the alternatives table that are used for filtering. """ return list( toolz.unique( toolz.concatv( util.columns_in_filters(self.alts_predict_filters), util.columns_in_filters(self.alts_fit_filters))))
def columns_used(self): """ Returns all the columns used across all models in the group for filtering and in the model expression. """ return list(toolz.unique(toolz.concatv( util.columns_in_filters(self.fit_filters), util.columns_in_filters(self.predict_filters), self._group.columns_used())))
def fn(grouped_items): return concatv( [{ "sales_employee": sales_employee, "paid_qty": sum_by("paid_qty")(grouped_items), "free_qty": sum_by("free_qty")(grouped_items), "gross": sum_by("gross")(grouped_items), }], grouped_items, )
def columns_used(self): """ Returns all the columns used in this model for filtering and in the model expression. """ return list(toolz.unique(toolz.concatv( util.columns_in_filters(self.fit_filters), util.columns_in_filters(self.predict_filters), util.columns_in_formula(self.model_expression))))
def interaction_columns_used(self): """ Columns from the interaction dataset used for filtering and in the model. These may come originally from either the choosers or alternatives tables. """ return list(toolz.unique(toolz.concatv( util.columns_in_filters(self.interaction_predict_filters), util.columns_in_formula(self.model_expression))))
def _get_filters(filters): scrap_warehouse = frappe.db.get_single_value("Optical Store Settings", "scrap_warehouse") clauses = concatv( ["i.disabled = 0"], ["warehouse = %(warehouse)s"] if filters.warehouse else [], ["warehouse != '{}'".format(scrap_warehouse)] if scrap_warehouse else [], ) return " AND ".join(clauses), filters
def columns_used(self): """ Columns from any table used in the model. May come from either the choosers or alternatives tables. """ return list(toolz.unique(toolz.concatv( self.choosers_columns_used(), self.alts_columns_used(), self.interaction_columns_used())))
def test_deltas(self, asset_info): expr = bz.Data(self.df, name='expr', dshape=self.dshape) deltas = bz.Data(self.df, dshape=self.dshape) deltas = bz.Data( odo( bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ), pd.DataFrame, ), name='delta', dshape=self.dshape, ) expected_views = keymap(pd.Timestamp, { '2014-01-02': np.array([[10.0, 11.0, 12.0], [1.0, 2.0, 3.0]]), '2014-01-03': np.array([[11.0, 12.0, 13.0], [2.0, 3.0, 4.0]]), '2014-01-04': np.array([[12.0, 13.0, 14.0], [12.0, 13.0, 14.0]]), }) nassets = len(asset_info) if nassets == 4: expected_views = valmap( lambda view: np.c_[view, [np.nan, np.nan]], expected_views, ) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([12] * nassets, [13] * nassets, [14] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) dates = self.dates dates = dates.insert(len(dates), dates[-1] + timedelta(days=1)) self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def columns_used(self): """ Columns from any table used in the model. May come from either the choosers or alternatives tables. """ return list(toolz.unique(toolz.concatv( self.choosers_columns_used(), self.alts_columns_used(), self.interaction_columns_used(), util.columns_in_formula(self.default_model_expr), [self.segmentation_col])))
def columns_used(self): """ Returns all the columns used across all models in the group for filtering and in the model expression. """ return list(toolz.unique(toolz.concatv( util.columns_in_filters(self.fit_filters), util.columns_in_filters(self.predict_filters), util.columns_in_formula(self.default_model_expr), self._group.columns_used(), [self.segmentation_col])))
def render_tabular(api, options=None): """Entry point for the tabular reporter interface.""" # determine separator separator = options.get('report.separator', '\t') human = options.get('report.human') panel = options.get('report.panel') samples = options.get('report.samples') group = options.get('report.group') # read gene panel file if it has been set if panel: superblock_ids = [line.rstrip() for line in panel] else: superblock_ids = None # get sample ID, group and cutoff from metadata sample_query = limit_query(api.samples(), group=group, samples=samples) metadata = ((sample.id, sample.group_id, sample.cutoff) for sample in sample_query) # get the data base_query = limit_query(api.average_metrics(superblock_ids=superblock_ids), group=group, samples=samples) queries = [metadata, base_query, api.diagnostic_yield(superblock_ids=superblock_ids, group_id=group, sample_ids=samples), api.sex_checker(group_id=group, sample_ids=samples)] # group multiple queries by sample ID (first column) key_metrics = groupby(get(0), concat(queries)) # get the column names dynamically from the query headers = concatv(['sample_id', 'group_id', 'cutoff'], (column['name'] for column in base_query.column_descriptions), ['diagnostic yield', 'gender']) unique_headers = unique(headers) # iterate over all values, concat different query results, and keep # only the unique values (excluding second sample_id) data = (unique(concat(values)) for values in itervalues(key_metrics)) if human: # export key_metrics in a more human friendly format return tabulate(data, unique_headers) # yield headers return '\n'.join(cons('#' + separator.join(unique_headers), stringify_list(data, separator=separator)))
def iter_enumerations(): integers_or_symbols = concatv( find(children, type='integer'), find(children, type='symbol'), ) values = list(pluck('value', integers_or_symbols)) if values: yield make_json_ast_node( type='enumeration_values', values=values, ) intervals = find_many_or_none(children, type='interval') if intervals is not None: yield from intervals
def load_extensions(default, extensions, strict, environ, reload=False): """Load all of the given extensions. This should be called by run_algo or the cli. Parameters ---------- default : bool Load the default exension (~/.zipline/extension.py)? extension : iterable[str] The paths to the extensions to load. If the path ends in ``.py`` it is treated as a script and executed. If it does not end in ``.py`` it is treated as a module to be imported. strict : bool Should failure to load an extension raise. If this is false it will still warn. environ : mapping The environment to use to find the default extension path. reload : bool, optional Reload any extensions that have already been loaded. """ if default: default_extension_path = pth.default_extension(environ=environ) pth.ensure_file(default_extension_path) # put the default extension first so other extensions can depend on # the order they are loaded extensions = concatv([default_extension_path], extensions) for ext in extensions: if ext in _loaded_extensions and not reload: continue try: # load all of the zipline extensionss if ext.endswith('.py'): with open(ext) as f: ns = {} six.exec_(compile(f.read(), ext, 'exec'), ns, ns) else: __import__(ext) except Exception as e: if strict: # if `strict` we should raise the actual exception and fail raise # without `strict` we should just log the failure warnings.warn( 'Failed to load extension: %r\n%s' % (ext, e), stacklevel=2 ) else: _loaded_extensions.add(ext)
def parsetag(tag: str): """ Takes a complex tag, like: tag#id.class1.class2, or tag.class1#id.class2 where the id could occur anywhere or the classes could occur anywhere **returns**: a tuple of (str, {'id':str, 'class':str}). the first is the tag name, and the second is an attrs dictionary prepopulated with the id and class from this tag. """ if '.' not in tag and '#' not in tag: return tag, {'id': '', 'class': ''} if '#' not in tag: tag, *classes = tag.split('.') return tag, {'class': ' '.join(classes), 'id': ''} one, two = tag.split('#', maxsplit=1) # there should only be one of these. tag, *c1 = one.split('.') id, *c2 = two.split('.') return tag, {'id': id, 'class': ' '.join(t.concatv(c1, c2))}
def test_deltas_macro(self): asset_info = asset_infos[0][0] expr = bz.Data(self.macro_df, name='expr', dshape=self.macro_dshape) deltas = bz.Data( self.macro_df.iloc[:-1], name='deltas', dshape=self.macro_dshape, ) deltas = bz.transform( deltas, value=deltas.value + 10, timestamp=deltas.timestamp + timedelta(days=1), ) nassets = len(asset_info) expected_views = keymap(pd.Timestamp, { '2014-01-02': repeat_last_axis(np.array([10.0, 1.0]), nassets), '2014-01-03': repeat_last_axis(np.array([11.0, 2.0]), nassets), }) with tmp_asset_finder(equities=asset_info) as finder: expected_output = pd.DataFrame( list(concatv([10] * nassets, [11] * nassets)), index=pd.MultiIndex.from_product(( sorted(expected_views.keys()), finder.retrieve_all(asset_info.index), )), columns=('value',), ) dates = self.dates self._run_pipeline( expr, deltas, expected_views, expected_output, finder, calendar=dates, start=dates[1], end=dates[-1], window_length=2, compute_fn=np.nanmax, )
def parse_deps(f): """ Parse dependencies from a single setup.py file. """ setup = ''.join(strip_comments(l) for l in f) setup = re.sub(r'\s', '', setup) install_reqs = re.search(r'install_requires=\[(.+?)\],', setup) extra_reqs = re.findall(r'extras_require=\{.*\[(.+?)\].*\}', setup) if install_reqs: install_reqs = install_reqs.groups(1)[0].split(',') else: install_reqs = [] extra_reqs = toolz.concat(e.split(',') for e in extra_reqs) return ( s for s in (ss.strip('\'"') for ss in toolz.concatv( install_reqs, extra_reqs)) if s)
def handle_exception(exc, *, _Done=Done, _type=type, _getframe=sys._getframe, _isinstance=isinstance, _Word=Word, _clear_cstack=clear_cstack): """Handle exceptions that are raised during phorth operations. Parameters ---------- exc : Exception The exception that was raised. Notes ----- This normally just prints the exception and restarts jumps us to the start of the repl with a clean stack. If ``exc`` is an instance of ``Done``, this will reraise the exception and kill the phorth session. """ if _isinstance(exc, _Done): # reraise the sentinel `Done` type raise Done() f = _getframe(1) cstack = _clear_cstack(f) print( 'traceback, most recent call last:\n %s\n%s: %s' % ( '\n '.join(map( str, concatv( map(op.add(1), reversed(cstack)), (exc.__traceback__.tb_lasti,), ))), _type(exc).__name__, exc, ), )
def merge(*exprs, **kwargs): if len(exprs) + len(kwargs) == 1: # we only have one object so don't need to construct a merge if exprs: # we only have a positional argumnent, return it unchanged return exprs[0] if kwargs: # we only have a single keyword argument, label it and return it [(k, v)] = kwargs.items() return v.label(k) # label all the kwargs and sort in key order exprs = tuple(concatv( (_wrap(expr, '_%s' % n) for n, expr in enumerate(exprs)), ( label(_wrap(v, k), k) for k, v in sorted(kwargs.items(), key=first) ), )) if all(ndim(expr) == 0 for expr in exprs): raise TypeError('cannot merge all scalar expressions') result = Merge( exprs, varargsexpr(exprs), maxshape(map(shape, exprs)), ) if not isdistinct(result.fields): raise ValueError( "Repeated columns found: " + ', '.join( k for k, v in frequencies(result.fields).items() if v > 1 ), ) return result
def visit_infix_expression(node, operators={}): def interleave(*iterables): for values in itertools.zip_longest(*iterables, fillvalue=UnboundLocalError): for index, value in enumerate(values): if value != UnboundLocalError: yield index, value tokens = [ visit_node(operand_or_operator) if index == 0 else operators.get(operand_or_operator, operand_or_operator) for index, operand_or_operator in interleave(node['operands'], node['operators']) ] # Transform product expressions into a lazy "and" expression in order to prevent a division by 0: if node['type'] == 'product_expression': tokens = concatv( interpose( el='and', seq=map(visit_node, node['operands']), ), ['and'], tokens, ) return '({})'.format(' '.join(map(str, tokens)))
def export_paths(self, output_file): files = toolz.concatv(*self.media.values()) dirs = set() map(lambda file: dirs.add(file.rsplit('/', 1)[0]), files) with open(output_file, 'w') as f: f.write('\n'.join(sorted(dirs)))
def delete_all(self): if raw_input('are you sure? (y/n) ') == 'y': files = toolz.concatv(*self.media.values()) for i in files: os.remove(i) print 'deleted:', i