def entity_pair2is_appendable( cls, text, entity_pair, ): Param = PriceSkillParameter entity_type_pair = lmap(FoxylibEntity.entity2type, entity_pair) param_type_pair = lmap(Param.Type.entity_type2parameter_type, entity_type_pair) for param_type in param_type_pair: if param_type not in {Param.Type.PORTLIKE, Param.Type.TRADEGOOD}: return False param_type_1, param_type_2 = param_type_pair if param_type_1 != param_type_2: return False span_pair = lmap(FoxylibEntity.entity2span, entity_pair) text_between = StringTool.str_span2substr( text, SpanTool.span_pair2between(*span_pair)) is_fullmatch = RegexTool.pattern_str2match_full( Param.pattern_delim(), text_between) if not is_fullmatch: return False return True
def table_pagesize2split_OLD(cls, table, ncol_per_page): buffer = cls.COUNT_COLHEAD ncol_overlap = 1 # overlapping column 1 n_row = len(table) divider = ncol_per_page - buffer - ncol_overlap ncol_table = iter2singleton(map(len, table)) ncol_data = ncol_table - buffer n_page = (ncol_data - ncol_overlap) // divider + ( 1 if (ncol_data - ncol_overlap) % divider else 0) cols_header = lmap(lambda l: l[:cls.COUNT_COLHEAD], table) for i in range(n_page): start = buffer + i * divider end = buffer + min((i + 1) * divider + 1, ncol_data) cols_body = lmap( lambda l: SpanTool.list_span2sublist(l, (start, end)), table) table_partial = [ cols_header[i] + cols_body[i] for i in range(n_row) if any(cols_body[i]) ] yield table_partial
def obj_list2uncovered(cls, obj_list, f_obj2se=None): if f_obj2se is None: f_obj2se = lambda x:x se_list = lmap(f_obj2se, obj_list) ilist_uncovered = cls.se_list2index_list_uncovered(se_list) return lmap(lambda i:obj_list[i], ilist_uncovered)
def text2entity_list(cls, str_in): logger = FoxylibLogger.func_level2logger(cls.text2entity_list, logging.DEBUG) entity_list_1day_raw = DayofweekEntityKo.text2entity_list(str_in) entity_list_multiday = cls._text2entity_list_multiday(str_in) span_list_multiday = lmap(FoxylibEntity.entity2span, entity_list_multiday) def entity_1day2is_not_covered(entity_1day): span_1day = FoxylibEntity.entity2span(entity_1day) for span_multiday in span_list_multiday: if SpanTool.covers(span_multiday, span_1day): return False return True entity_list_1day_uncovered = lfilter(entity_1day2is_not_covered, entity_list_1day_raw) entity_list = lchain( lmap(cls._entity_1day2multiday, entity_list_1day_uncovered), entity_list_multiday) return entity_list
def data2check_unique(cls, j_colhead_list, str_COL_list_ROW_list): # if not cls.ColHead.j_head2is_key(colhead): return count_col = len(j_colhead_list) j_list_uniq = lfilter( lambda j: cls.ColHead.j_head2is_key(j_colhead_list[j]), range(count_col)) if not j_list_uniq: return count_row = len(str_COL_list_ROW_list) tuple_ROW_list = lmap( lambda row: tuple(map(lambda j: row[j], j_list_uniq)), str_COL_list_ROW_list) iList_duplicate = sorted( lfilter_duplicate(range(count_row), key=lambda i: tuple_ROW_list[i]), key=lambda i: (tuple_ROW_list[i], i), ) if not iList_duplicate: return column_name_list = lmap( lambda j: cls.ColHead.j_head2col_name(j_colhead_list[j]), j_list_uniq) tuple_ROW_list_duplicate = lmap(partial(ListTool.li2v, tuple_ROW_list), iList_duplicate) h_error = { "column_name_list": column_name_list, "rownum_list_duplicate": lmap(cls._i2rownum, iList_duplicate), "tuple_ROW_list_duplicate": tuple_ROW_list_duplicate, } raise cls.DataUniqueValidatorException(h_error)
def obj_list2uncovered(cls, obj_list, f_obj2span=None): if f_obj2span is None: f_obj2span = lambda x: x span_list = lmap(f_obj2span, obj_list) i_set_uncovered = cls.span_list2indexes_uncovered(span_list) return lmap(lambda i: obj_list[i], i_set_uncovered)
def learn_from_transitions(self, transitions): states = np.array( lmap(lambda transition: transition[0].state, transitions)) old_state_qs = self.sess.run(self.q, feed_dict={self.input_tensor: states}) next_stateQs = self.sess.run( self.q, feed_dict={ self.input_tensor: np.array( lmap(lambda transition: transition[-1].next_state, transitions)) }) rewards = np.array(lmap(self.calculate_transition_reward, transitions)) actions = np.array( lmap(lambda transition: transition[0].action.index, transitions)) next_max_qs = next_stateQs.max(1) target = ((self.gamma**transitions[0].n) * next_max_qs) + rewards q, predictions, loss, training, summary = self.sess.run( [ self.q, self.predictions, self.loss, self.training, self.summary_op ], feed_dict={ self.input_tensor: states, self.action: actions, self.target: target }) self.train_writer.add_summary(summary) print loss
def get_expected_edges(partobj, weight='weight', directed=False): ''' Get the expected internal edges under configuration models :math:`\\hat{P}=\\sum_{ij}{\\frac{k_ik_j}{2m}\\delta(c_i,c_j)}` :param partobj: :type partobj: igraph.VertexClustering :param weight: True uses 'weight' attribute of edges :return: float ''' if weight is None: m = float(partobj.graph.ecount()) else: try: m = np.sum(partobj.graph.es[weight]) except: m = partobj.graph.ecount() # print(m) if m == 0: return 0 kk = 0 #Hashing this upfront is alot faster (factor of 10). partobj.graph.vs['_id'] = range(partobj.graph.vcount()) indices = [partobj.graph.vs['_id'][v.index] for v in partobj.graph.vs] if weight == None: strengths = dict(zip(indices, partobj.graph.outdegree(indices))) if directed: strengths_in = dict(zip(indices, partobj.graph.indegree(indices))) else: strengths_in = strengths else: strengths = dict( zip(indices, partobj.graph.strength(indices, weights=weight, mode='OUT'))) if directed: strengths_in = dict( zip(indices, partobj.graph.strength(indices, weights=weight, mode='IN'))) else: strengths_in = strengths for subg in partobj.subgraphs(): # since node ordering on subgraph doesn't match main graph, get vert id's in original graph # verts=map(lambda x: int(re.search("(?<=n)\d+", x['id']).group()),subg.vs) #you have to get full weight from original graph # svec=partobj.graph.strength(verts,weights='weight') #i think is what is slow svec = np.array( lmap(lambda x: strengths[subg.vs['_id'][x.index]], subg.vs)) # svec=subg.strength(subg.vs,weights='weight') svec_in = np.array( lmap(lambda x: strengths_in[subg.vs['_id'][x.index]], subg.vs)) kk += np.sum(np.outer(svec, svec_in)) if directed: return kk / (1.0 * m) else: return kk / (2.0 * m)
def port_tradegood_lists2blocks(cls, port_tradegood_list, price_dict, lang, groupby_parameter_type): logger = HenriqueLogger.func_level2logger( cls.port_tradegood_lists2blocks, logging.DEBUG) logger.debug({"port_tradegood_list": port_tradegood_list}) if groupby_parameter_type == PriceSkillParameter.Type.PORTLIKE: from henrique.main.skill.price.by_port.price_by_port import PriceByPort blocks = [ PriceByPort.port2text(port_codename, lmap(ig(1), l), price_dict, lang) for port_codename, l in gb_tree_global(port_tradegood_list, [ig(0)]) ] return blocks if groupby_parameter_type == PriceSkillParameter.Type.TRADEGOOD: from henrique.main.skill.price.by_tradegood.price_by_tradegood import PriceByTradegood blocks = [ PriceByTradegood.tradegood2text(tg_codename, lmap(ig(0), l), price_dict, lang) for tg_codename, l in gb_tree_global(port_tradegood_list, [ig(1)]) ] return blocks raise Exception(groupby_parameter_type)
def continuous_blank_lines2removed(cls, str_in, blank_line_count_allowed): l_line = lmap(cls.str2strip, str_in.splitlines()) i_list_invalid = IterTool.list_func_count2index_list_continuous_valid(l_line, lambda x:not x, blank_line_count_allowed) n = len(l_line) # raise Exception({"i_list_invalid":i_list_invalid, "l_line":l_line,}) return "\n".join(lmap(lambda i:l_line[i], filter(lambda i:i not in i_list_invalid, range(n))))
def calculate_transition_reward(self, transition): def decay_reward(tup): r, i = tup return self.gamma**i * r l = lmap(lambda t: t.reward, reversed(transition)) decay = lmap(decay_reward, zip(l, range(transition.n))) sumval = sum(decay) return sumval
def calculate_transition_reward(self, transition): def decay_reward(tup): r, i = tup return self.params.gamma**i * r return sum( lmap( decay_reward, zip(lmap(lambda t: t.reward, reversed(transition[:-1])), range(transition.n - 1))))
def _text2entity_list(cls, text_in, lang): element_list = TimedeltaElement.text2element_list(text_in, lang) if not element_list: return [] span_list_element = lmap(TimedeltaElement.element2span, element_list) def timedelta_list2indexes_group(): gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr, text_in) n = len(element_list) i_list_sorted = sorted(range(n), key=lambda i: span_list_element[i]) indexes_continuous = [i_list_sorted[0]] for j in range(1, n): i_prev, i_this = i_list_sorted[j - 1], i_list_sorted[j] span_gap = ( span_list_element[i_prev][1], span_list_element[i_this][0], ) if gap2is_valid(span_gap): indexes_continuous.append(i_this) continue yield indexes_continuous indexes_continuous = [i_this] yield indexes_continuous indexes_list = list(timedelta_list2indexes_group()) def indexes2entity(indexes): span = ( span_list_element[indexes[0]][0], span_list_element[indexes[-1]][1], ) value = ListTool.indexes2filtered(element_list, indexes) entity = { FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span), FoxylibEntity.Field.VALUE: value, FoxylibEntity.Field.TYPE: cls.entity_type(), } return entity entity_list = lmap(indexes2entity, indexes_list) return entity_list
def sorted_by_key_index(cls, l, f_key, ): key_list = lmap(f_key, l) h = dict(reversed([(key, i) for i, key in enumerate(key_list)])) key_obj_list = lzip_strict(key_list, l) f_key = f_a2t(lambda key, obj: h[key]) # key_obj_list_sorted = SortTool.countingsorted(key_obj_list,f_key=f_key) key_obj_list_sorted = sorted(key_obj_list, key=f_key) l_sorted = lmap(ig(1), key_obj_list_sorted) return l_sorted
def yaml_envnames2kv_list(cls, json_yaml, envs): logger = FoxylibLogger.func_level2logger(cls.yaml_envnames2kv_list, logging.DEBUG) key_list = list(json_yaml.keys()) value_list = lmap(partial(cls.json_envs_key2value, json_yaml, envs), key_list) m = len(key_list) index_list_valid = lfilter(lambda i: value_list[i] is not None, range(m)) return lmap(lambda i: (key_list[i], value_list[i]), index_list_valid)
def _text2entity_list_multiday(cls, str_in): logger = FoxylibLogger.func_level2logger( cls._text2entity_list_multiday, logging.DEBUG) entity_list_1day = DayofweekEntityKoSingle.text2entity_list(str_in) p_delim = cls.pattern_delim() m_list_delim = list(p_delim.finditer(str_in)) span_ll = [ lmap(FoxylibEntity.entity2span, entity_list_1day), lmap(MatchTool.match2span, m_list_delim), lmap(FoxylibEntity.entity2span, entity_list_1day), ] f_span2is_gap = lambda span: cls.str_span2is_gap(str_in, span) j_tuple_list = list( ContextfreeTool.spans_list2reducible_indextuple_list( span_ll, f_span2is_gap)) logger.debug({ "j_tuple_list": j_tuple_list, "entity_list_1day": entity_list_1day, "m_list_delim": m_list_delim, }) for j_tuple in j_tuple_list: j1, j2, j3 = j_tuple entity_pair = entity_list_1day[j1], entity_list_1day[j3] logger.debug({ "j1": j1, "j3": j3, "entity_pair": entity_pair, }) span = ( FoxylibEntity.entity2span(entity_pair[0])[0], FoxylibEntity.entity2span(entity_pair[1])[1], ) j_entity = { FoxylibEntity.Field.TYPE: DayofweekSpanEntity.entity_type(), FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.FULLTEXT: str_in, FoxylibEntity.Field.VALUE: tmap(FoxylibEntity.entity2value, entity_pair), } yield j_entity
def data2entity_list(cls, data): text_in = TimeEntity.Data.data2text_in(data) m_list_hour = TimeEntity.Data.data2match_list_hour(data) span_list_hour = lmap(lambda m: m.span(), m_list_hour) m_list_ampm = TimeEntity.Data.data2match_list_ampm(data) span_list_ampm = lmap(lambda m: m.span(), m_list_ampm) spans_list = [ span_list_hour, span_list_ampm, ] gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr, text_in) indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list( spans_list, gap2is_valid) def indextuple2entity(indextuple): i, j = indextuple m_hour, m_ampm = m_list_hour[i], m_list_ampm[j] hour_raw = TimeTool.hour2norm(int(m_hour.group())) if hour_raw is None: return None hour, ampm = AMPM.hour_ampm2normalized(hour_raw, AMPM.match2value(m_ampm)) if hour is None: return None if ampm is None: return None span = (m_hour.span()[0], m_ampm.span()[1]) value = { TimeEntity.Value.Field.HOUR: hour, TimeEntity.Value.Field.MINUTE: 0, TimeEntity.Value.Field.AMPM: ampm, } entity = { FoxylibEntity.Field.FULLTEXT: text_in, FoxylibEntity.Field.TYPE: TimeEntity.entity_type(), FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.VALUE: value } return entity entity_list = lfilter(is_not_none, map(indextuple2entity, indextuple_list)) return entity_list
def doc_list2migrated(cls, doc_list_in, collection_to, j_config=None, **__): j_doc_list_in = lmap(DocumentTool.doc2meta_keys_removed, doc_list_in) result = collection_to.insert_many(j_doc_list_in, **__) j_doc_list_out_raw = lmap(MongoDBTool.bson2json, result) self_ref_config = cls.j_config2self_ref_config(j_config) if not self_ref_config: return j_doc_list_out_raw return result
def hierarchy(self, parameter_s=''): """Draw hierarchy of a given class.""" args = parse_argstring(self.hierarchy, parameter_s) objects = lmap(self.shell.ev, args.object) clslist = lmap(self._object_to_class, objects) namelist = lmap(self._class_name, clslist) igraph = FoldedInheritanceGraph(namelist, '', width=args.name_width) code = igraph.generate_dot('inheritance_graph', graph_attrs={ 'rankdir': args.rankdir, 'size': '"{0}"'.format(args.size) }) stdout = run_dot(code, format='png') display_png(stdout, raw=True)
def minimaxes_n(cls, l, p, key=None): if key is None: key = lambda x: x k_list = lmap(key, l) indexes_min, indexes_max = cls.indexes_minimax_n( k_list, p, ) mins = lmap(lambda i: l[i], indexes_min) maxs = lmap(lambda i: l[i], indexes_max) return (mins, maxs)
def _text2entity_list(cls, text_in, lang): match_list_sign = list(cls.Sign.pattern().finditer(text_in)) span_list_sign = lmap(lambda m: m.span(), match_list_sign) entity_list_timedelta = TimedeltaEntity._text2entity_list( text_in, lang) span_list_timedelta = lmap(FoxylibEntity.entity2span, entity_list_timedelta) span_lists = [ span_list_sign, span_list_timedelta, ] gap2is_valid = partial(StringTool.str_span2match_blank_or_nullstr, text_in) indextuple_list = ContextfreeTool.spans_list2reducible_indextuple_list( span_lists, gap2is_valid) def indextuple2entity(indextuple): i, j = indextuple match_sign = match_list_sign[i] span_sign = span_list_sign[i] sign = match_sign.group() entity_timedelta = entity_list_timedelta[j] span_timedelta = span_list_timedelta[j] value = { cls.Value.Field.SIGN: sign, cls.Value.Field.TIMEDELTA: entity_timedelta } span = ( span_sign[0], span_timedelta[1], ) entity = { FoxylibEntity.Field.SPAN: span, FoxylibEntity.Field.TEXT: StringTool.str_span2substr(text_in, span), FoxylibEntity.Field.VALUE: value, FoxylibEntity.Field.TYPE: cls.entity_type(), } return entity entity_list = lmap(indextuple2entity, indextuple_list) return entity_list
def match_nodes2groupname_list(cls, m, cls_node_list): str_group_list = MatchTool.match2str_group_list(m) nodename_list = lmap(cls2name, cls_node_list) str_group_list_related = lfilter( lambda s: s.split("__")[-1] in nodename_list, str_group_list) return str_group_list_related
def _rec_inner_join_helper(keycols, arr_list): '''All the dtype-wrangling and assertions for rec_inner_join''' assert len(arr_list), 'You must pass a string and one or more record arrays!' assert len(set(keycols)) == len(keycols), 'keycols must not contain duplicates!' #if jointype not in ['inner', 'outer', 'left']: # msg = '{} jointype is not implemented. Only inner, outer, and left join are implemented.' # raise Exception(msg.format(jointype)) names_list = [a.dtype.names for a in arr_list] dtypes_list = lmap(get_rec_dtypes, arr_list) names_and_dtypes_list = [lzip(names, dtypes) for names, dtypes in zip(names_list, dtypes_list)] _nd_dict = dict(zip(names_list[0], dtypes_list[0])) key_dtypes = [_nd_dict[name] for name in keycols] non_key_names_and_dtypes = [[(name, dt) for name, dt in name_dtype_list if name not in keycols] for name_dtype_list in names_and_dtypes_list] non_key_col_names = fL(non_key_names_and_dtypes)[:, :, 0] non_key_dtypes = fL(non_key_names_and_dtypes)[:, :, 1] output_dtype = lzip(keycols, key_dtypes) + flatten(non_key_names_and_dtypes) # Assertions to ensure bad things can't happen: msg = 'Each input array must have all the keycols' assert all([not (set(keycols) - set(arr.dtype.names)) for arr in arr_list]), msg msg = 'All arrays must have the same dtype for all keycols and may not share any other columns in common' _all_names = flatten(names_list) expected_num_cols = len(_all_names) - len(keycols) * (len(arr_list) - 1) assert expected_num_cols == len(output_dtype) == len(set(_all_names)), msg return non_key_col_names, output_dtype
def tradegood2text(cls, tradegood_codename, port_codename_list, price_dict, lang): n = len(port_codename_list) tradegood = Tradegood.codename2tradegood(tradegood_codename) str_title = cls.tradegood_lang2title(tradegood, lang) def port2price(port_codename): price = MarketpriceDict.lookup(price_dict, port_codename, tradegood_codename) if price: return price price_fake = MarketpriceDoc.price_tradegood2doc_fake( port_codename, tradegood_codename) return price_fake price_list = lmap(port2price, port_codename_list) i_list_sorted = sorted( range(n), key=lambda i: MarketpriceDoc.key_default(price_list[i])) # price_list = sorted(price_list_raw, key=MarketpriceDoc.key_default) rows_body = [ cls._price_lang2text(price_list[i], port_codename_list[i], lang) for i in i_list_sorted ] return Rowsblock.rows2text(chain( [str_title], rows_body, ))
def countingsorted(cls, iterable, f_key=None, ): l = list(iterable) if not l: return l if f_key is None: f_key = lambda x: x n = len(l) key_obj_list = [(f_key(x), x) for x in l] # O(n) key_list = lmap(ig(0), key_obj_list) # O(n) for k in key_list: # O(n) if k < 0: raise Exception() m = max(key_list) # O(n) counter = [0] * (m + 1) for key in key_list: # O(n) counter[key] += 1 index_list = [0] * (m + 1) total = 0 for i, v in enumerate(counter): # O(m) total += v index_list[i] = total l_result = [None] * n for key, obj in reversed(key_obj_list): # O(n) i = index_list[key] l_result[i - 1] = obj index_list[key] -= 1 # for i,x in enumerate(l_result): # if x is None: raise Exception(i) return l_result
def _broadcast_arr_list(l, reverse=False): '''Helper function to broadcast all elements in a list to arrays with a common shape Uses broadcast_arrays unless there is only one box''' arr_list = lmap(np.asanyarray, l) broadcast = (reverse_broadcast(broadcast_arrays) if reverse else broadcast_arrays) return (broadcast(*arr_list) if len(arr_list) > 1 else arr_list)
def get_first_indices(arr, values, missing=None): '''Get the index of the first occurrence of the list of values in the (flattened) array The missing argument determines how missing values are handled: None: ignore them, leave them None -1: make them all -1 'len': replace them with the length of the array (aka outside the array) 'fail': throw an error''' bad_str = """Bad value for "missing", choose one of: None, -1, 'len', 'fail'""" assert missing in [None, -1, 'len', 'fail'], bad_str arr = np.asanyarray(arr) first_inds = dict(zip(*find_first_occurrence_1d(arr))) inds = lmap(first_inds.get, values) if missing == 'fail' and None in inds: raise Exception('Value Error! One of the values is not in arr') elif missing == 'len': default = arr.size elif missing == -1: default = -1 else: default = None if default is not None: inds = [default if i is None else i for i in inds] return np.array(inds)
def nd_radial_gradient(shape, offsets=None): if offsets is None: offsets = [0] * len(shape) grids = np.mgrid.__getitem__(lmap(slice, shape)) v = [(g + off + 0.5 - s / 2) ** 2 for s, off, g in zip(shape, offsets, grids)] return np.sqrt(np.sum(v, axis=0))
def adapt_tags(self): """ 适配从数据端来的tag基本信息到客户端需要的数据 """ if self.tag and self.tag.strip() != '': try: tags = json.loads(self.tag) is_hot = tags.get('is_hot', False) is_important = tags.get('is_important', False) is_discussed = tags.get('is_discussed', False) if is_hot: self.tags.append(TAG_HOT) if is_important: self.tags.append(TAG_IMPORTANT) if is_discussed: self.tags.append(TAG_DISCUSSED) self.is_comment_hot = True # FIXME 比较别扭的逻辑 hit_keywords = tags.get('sport_ref', []) if hasattr(self, 'type') and self.type == EnumItemType.VIDEO: if hit_keywords: keywords = sorted(hit_keywords, key=lambda x: x[1], reverse=True) self.tags.extend(lmap(lambda x: make_sport_tag(x[0]), keywords[:3])) # TODO 要闻后台人工标签逻辑,目前仅实现去掉要闻标签逻辑,其他功能待产品进一步细化 if self.manual_tags == [0]: self.tags = lfilter(lambda x: x['id'] != TAG_IMPORTANT['id'], self.tags) except: pass
def port_list_all(cls): h_codename2aliases_en = NamesenSheet.dict_codename2aliases() h_codename2aliases_ko = NameskoSheet.dict_codename2aliases() h_codename2culture = CultureSheet.dict_codename2culture() h_codename2product_list = ProductSheet.dict_codename2products() h_codename2comments_ko = CommentsKoSheet.dict_codename2comments() # raise Exception({"h_codename2product_list":h_codename2product_list}) codename_list = luniq(chain(h_codename2aliases_en.keys(), h_codename2aliases_ko.keys(), ) ) def codename2port(codename): aliases = DictTool.filter(lambda k, v: v, {"en": h_codename2aliases_en.get(codename), "ko": h_codename2aliases_ko.get(codename), }) comments = DictTool.filter(lambda k, v: v, {"ko": h_codename2comments_ko.get(codename), }) port = {Port.Field.CODENAME: codename, Port.Field.CULTURE: h_codename2culture[codename], Port.Field.ALIASES: aliases, Port.Field.PRODUCTS: h_codename2product_list.get(codename), Port.Field.COMMENTS: comments, } return DictTool.filter(lambda k, v: v, port) return lmap(codename2port, codename_list)
def h_qterm2j_doc(cls): logger = HenriqueLogger.func_level2logger(cls.h_qterm2j_doc, logging.DEBUG) j_doc_list = list(TradegoodDocument.j_doc_iter_all()) jpath = TradegoodDocument.jpath_names() h_list = [{ cls._query2qterm(name): j_doc } for j_doc in j_doc_list for name_list_lang in jdown(j_doc, jpath).values() for name in name_list_lang] logger.debug({ "h_list": iter2duplicate_list( lmap(lambda h: iter2singleton(h.keys()), h_list)), "jpath": jpath, "j_doc_list[0]": j_doc_list[0], "query[0]": jdown(j_doc_list[0], jpath) }) qterm_list_duplicate = iter2duplicate_list( map(lambda h: iter2singleton(h.keys()), h_list)) h_list_clean = lfilter( lambda h: iter2singleton(h.keys()) not in qterm_list_duplicate, h_list) h = merge_dicts(h_list_clean, vwrite=vwrite_no_duplicate_key) return h
def culture_list_all(cls): logger = HenriqueLogger.func_level2logger(cls.culture_list_all, logging.DEBUG) h_codename2aliases_en = NamesenSheet.dict_codename2aliases() h_codename2aliases_ko = NameskoSheet.dict_codename2aliases() h_codename2prefers = PrefersSheet.dict_codename2prefers() codename_list = luniq( chain( h_codename2aliases_en.keys(), h_codename2aliases_ko.keys(), h_codename2prefers.keys(), )) def codename2culture(codename): aliases = DictTool.filter( lambda k, v: v, { "en": h_codename2aliases_en.get(codename), "ko": h_codename2aliases_ko.get(codename), }) culture = { Culture.Field.CODENAME: codename, Culture.Field.ALIASES: aliases, Culture.Field.PREFERS: h_codename2prefers.get(codename) or [], } return DictTool.filter(lambda k, v: v, culture) list_all = lmap(codename2culture, codename_list) # logger.debug({"list_all":list_all}) return list_all
def process_args(metric_class, some_ag, load_fun, verify_usage=True): '''Unpack the arguments, load any metrics needed for cumputation using the load_fun, and compute any ratios requested (tuples). If "verify_usage" True, also ensure that the arguments to the metric class are acceptable. (This is essentially performs the delayed computation as defined in the metric_definitions DSL) ''' if some_ag is None: return [], {} # Grab the actual args and kwds unpack = partial(_unpack_val, load_fun) args, kwds = some_ag args = lmap(unpack, args) kwds = {k: unpack(v) for k, v in kwds.items()} if verify_usage: nargs = len(args) arg_names, metric_defaults = get_function_arg_names_and_kwd_values(metric_class.__init__) metric_args = arg_names[3:] # (ignore first 3 values: self, name, and data) max_nargs = len(metric_args) min_nargs = max_nargs - len(metric_defaults) assert min_nargs <= nargs <= max_nargs, 'Not enough arguments!' metrics_kwds_active = metric_args[nargs:] assert not set(kwds.keys()) - set(metrics_kwds_active), 'Unknown (or reused) keyword!' #kwds_defaults_dict = dict(zip(metric_args[-len(metric_defaults):], metric_defaults)) return args, kwds
def box_list(l, box_shape=None): '''Convert a list to boxes (object array of arrays) with shape optionally specified by box_shape''' box_shape = len(l) if box_shape is None else box_shape assert np.prod(box_shape) == len(l), 'shape must match the length of l''' boxed = np.empty(box_shape, dtype=np.object) boxed_flat = boxed.ravel() boxed_flat[:] = lmap(np.asanyarray, l) return boxed
def fast_load_auto_tag(cls, trial_id, code_hash, command, session=None): """Find automatic by code_hash and command. Ignore tags on the same trial_id Return (typ, tag) typ -- int representing the type of tag: 0: Completely new tag (1.1.1) 1: Match both code_hash and command (new tag should be x.y.+) 2: Match code_hash (new tag should be x.+.1) 3: New code_hash (new tag should be +.1.1) tag -- list with the found tag Arguments: trial_id -- id of trial that should be tagged code_hash -- code_hash of trial script command -- command line Keyword arguments: session -- specify session for loading (default=relational.session) """ from .trial import Trial session = session or relational.session ttag = cls.__table__ ttrial = Trial.__table__ _query = select([ttag.c.name]).where( (ttrial.c.id == ttag.c.trial_id) & (ttrial.c.id != bindparam("trial_id")) & (ttag.c.type == "AUTO") ) conditions = [ (1, ((ttrial.c.code_hash == bindparam("code_hash")) & (ttrial.c.command == bindparam("command")))), (2, ((ttrial.c.code_hash == bindparam("code_hash")))), (3, True) ] info = { "trial_id": trial_id, "code_hash": code_hash, "command": command, } for typ, condition in conditions: results = session.execute(_query.where(condition), info).fetchall() tags = [lmap(int, tag[0].split(".")) for tag in results] if tags: return typ, max(tags) return 0, [1, 1, 1]
def m1gen(self, x): '''m1, but able to handle iterables (lists, tuples, ...) and slices as well''' if hasattr(x, '__iter__'): return lmap(self.m1gen, x) elif type(x) is slice: return slice(self.m1(x.start), self.m1(x.stop), x.step) elif type(x) is int: return self.m1(x) else: print("Not sure what you're feeding me... signed, fancyIndexingListM1.m1gen") return self.m1(x)
def process_tty_output(self, o): """ process 'who' cmd output and return list of active ttys who cmd output: 'user console Jul 13 23:42' """ clean = o.lstrip().rstrip() if (type(clean)==type(b'')): clean = clean.decode("utf-8") clean_list = clean.split("\n")[1:] active_ttys = lmap(lambda x: x.split()[1], clean_list) return active_ttys
def merge_default_values(resource_list, default_values): """ Generate a new list where each item of original resource_list will be merged with the default_values. Args: resource_list: list with items to be merged default_values: properties to be merged with each item list. If the item already contains some property the original value will be maintained. Returns: list: list containing each item merged with default_values """ def merge_item(resource): return merge_resources(default_values, resource) return lmap(merge_item, resource_list)
def cartesian(arrays, out=None): '''Generate a cartesian product of input arrays. Inputs: * arrays : list of 1D array-like (to form the cartesian product of) * out : (optional) array to place the cartesian product in. Returns out, 2-D array of shape (M, len(arrays)) containing cartesian products formed of input arrays. Example: cartesian(([1, 2, 3], [4, 5], [6, 7])) array([[1, 4, 6], [1, 4, 7], [1, 5, 6], [1, 5, 7], [2, 4, 6], [2, 4, 7], [2, 5, 6], [2, 5, 7], [3, 4, 6], [3, 4, 7], [3, 5, 6], [3, 5, 7]]) Original code by SO user, "pv." http://stackoverflow.com/questions/1208118/using-numpy-to-build-an-array-of-all-combinations-of-two-arrays ''' arrays = lmap(np.asarray, arrays) dtype = arrays[0].dtype n = np.prod([x.size for x in arrays]) if out is None: out = np.empty([n, len(arrays)], dtype=dtype) arr, rest = arrays[0], arrays[1:] m = n // arr.size out[:, 0] = np.repeat(arr, m) if rest: cartesian(rest, out=out[:m, 1:]) for j in range(1, arr.size): out[j * m:(j + 1) * m, 1:] = out[:m, 1:] return out
def update_ports(self, ports, id_or_uri, timeout=-1): """ Updates the interconnect ports. Args: id_or_uri: Could be either the interconnect id or the interconnect uri. ports (list): Ports to update. timeout: Timeout in seconds. Wait task completion by default. The timeout does not abort the operation in OneView, just stops waiting for its completion. Returns: dict: The interconnect. """ resources = lmap(self.__port_with_default_type, ports) uri = self._client.build_uri(id_or_uri) + "/update-ports" return self._client.update(resources, uri, timeout)
def __call__(self, values): return lmap(self.split, values)
def find_first_occurrence_1d(arr, get_keys=True): '''Equivalent to find_first_occurrence(arr.ravel()), but should be much faster (uses the very fast get_index_groups function)''' keys, index_groups = get_index_groups(arr) first_occurrences = lmap(np.min, index_groups) return (keys, first_occurrences) if get_keys else first_occurrences
def cartesian_records(arrays, out=None): '''Generate a cartesian product of input record arrays, combining the results into a single record array with all fields. No two arrays can share the same field! Inputs: * arrays : list of 1D array-like (to form the cartesian product of) * out : (optional) array to place the cartesian product in. Returns out, 2-D array of shape (M, len(arrays)) containing cartesian products formed of input arrays. Example: cartesian_records((np.array([1., 2., 3.], dtype=[('a', np.float)]), np.array([4, 5], dtype=[('b', np.int)]), np.array([6, 7], dtype=[('c', np.int)]))) np.array([(1., 4, 6), (1., 4, 7), (1., 5, 6), (1., 5, 7), (2., 4, 6), (2., 4, 7), (2., 5, 6), (2., 5, 7), (3., 4, 6), (3., 4, 7), (3., 5, 6), (3., 5, 7)], dtype=[('a', np.float), ('b', np.int), ('c', np.int)]) Original code by SO user, "pv." http://stackoverflow.com/questions/1208118/using-numpy-to-build-an-array-of-all-combinations-of-two-arrays ''' arrays = lmap(np.asanyarray, arrays) output_length = np.prod([x.size for x in arrays]) names_list = [a.dtype.names for a in arrays] arr, rest = arrays[0], arrays[1:] arr_names, rest_names_list = names_list[0], names_list[1:] other_names = flatten(rest_names_list) if out is None: dtypes_list = lmap(get_rec_dtypes, arrays) assert all(names_list), 'All arrays must be record arrays!' output_dtype = [(n, d) for names, dtypes in zip(names_list, dtypes_list) for n, d in zip(names, dtypes)] msg = 'No duplicate fields can exist between input arrays!' assert len(output_dtype) == len(set(flatten(names_list))), msg out = np.empty(output_length, dtype=output_dtype) m = output_length // arr.size for name in arr_names: out[name] = np.repeat(arr[name], m) if rest: cartesian_records(rest, out=out[:m]) for name in other_names: for j in range(1, arr.size): out[name][j * m:(j + 1) * m] = out[name][:m] return out
def rec_inner_join(keycols, *arr_list): '''Inner join for numpy. A version of numpy.lib.recfunctions.join_by that always specifies inner product but also allows for duplicate key entries (many-to-many relationships) and can join two or more arrays simultaneously Warning: this function is not terribly efficient, especially if the amount of duplication is low. Use join_by when NO duplication is present, and also consider using pandas.merge Example: rec_inner_join('s', np.array([('x', 1.), ('x', 2.), ('y', 3.)], dtype=[('s', 'S20'), ('a', np.float)]), np.array([('x', 4), ('y', 5), ('x', 0)], dtype=[('s', 'S20'), ('b', np.int)]), np.array([(6, 'x'), (7, 'y'), (9, 'z')], dtype=[('c', np.int), ('s', 'S20')]),) -> np.array([('x', 1., 4, 6), ('x', 1., 0, 6), ('x', 2., 4, 6), ('x', 2., 0, 6), ('y', 3., 5, 7), ], dtype=[('s', 'S20'), ('a', np.float), ('b', np.int), ('c', np.int)]) ''' keycols = keycols if islistlike(keycols) else [keycols] non_key_col_names, output_dtype = _rec_inner_join_helper(keycols, arr_list) keys_list = [] index_groups_dict_list = [] for arr in arr_list: k, ig = get_index_groups(arr[keycols]) key = lmap(tuple, k) keys_list.append(key) index_groups_dict_list.append(dict(zip(key, ig))) # Stay with ONLY inner join for now since it simplifies the resulting # calculations (aka, no missing values) keys_use = list(keys_list[0]) for keys in keys_list[1:]: keys_use = [k for k in keys_use if k in set(keys)] # if jointype == 'left': # pass # elif jointype == 'inner': # for keys in keys_list[1:]: # keys_use = [k for k in keys_use # if k in set(keys)] # elif jointype == 'outer': # keys_set = lmap(set, keys_list) # keys_use_set = set(keys_use) # for keys in keys_list[1:]: # for k in keys: # if k not in keys_use_set: # keys_use.append(k) # keys_use_set.add(k) output_lengths = [np.prod([len(d[k]) for d in index_groups_dict_list]) for k in keys_use] # The length of each key group after joining output_len = sum(output_lengths) output_starts = np.cumsum([0] + output_lengths) output_arr = np.empty(output_len, dtype=output_dtype) # Copy of each input array where all keycols have been removed filtered_arrays = [arr[fields] for arr, fields in zip(arr_list, non_key_col_names)] kc_inds = {k: i for i, k in enumerate(keycols)} for key, start, length in zip(keys_use, output_starts, output_lengths): # For this key, get the associated values from each array # But use the filtered arrays so that all columns are unique values = [arr[d[key]] for arr, d in zip(filtered_arrays, index_groups_dict_list)] output_view = output_arr[start:(start + length)] for k in keycols: output_view[k] = key[kc_inds[k]] # Insert the results of this portion of the join # into the output array at the right location cartesian_records(values, out=output_view) return output_arr
def lmapf(f): '''Just the functional form of lmap: lmapf(f)(x) <--> lmap(f,x)''' return lambda *args: lmap(f, *args)
def __call__(self, values, loader_context): return lmap(lambda x: x*loader_context.get('number_bedrooms'), lmap(self.format_price, values))
def __call__(self, values): return lmap(self.format_price, values)
def detect_anoms(data, k=0.49, alpha=0.05, num_obs_per_period=None, use_decomp=True, one_tail=True, upper_tail=True, verbose=False): """ # Detects anomalies in a time series using S-H-ESD. # # Args: # data: Time series to perform anomaly detection on. # k: Maximum number of anomalies that S-H-ESD will detect as a percentage of the data. # alpha: The level of statistical significance with which to accept or reject anomalies. # num_obs_per_period: Defines the number of observations in a single period, and used during seasonal decomposition. # use_decomp: Use seasonal decomposition during anomaly detection. # one_tail: If TRUE only positive or negative going anomalies are detected depending on if upper_tail is TRUE or FALSE. # upper_tail: If TRUE and one_tail is also TRUE, detect only positive going (right-tailed) anomalies. If FALSE and one_tail is TRUE, only detect negative (left-tailed) anomalies. # verbose: Additionally printing for debugging. # Returns: # A dictionary containing the anomalies (anoms) and decomposition components (stl). """ if num_obs_per_period is None: raise ValueError("must supply period length for time series decomposition") if list(data.columns.values) != ["timestamp", "value"]: data.columns = ["timestamp", "value"] num_obs = len(data) # Check to make sure we have at least two periods worth of data for # anomaly context if num_obs < num_obs_per_period * 2: print("Anom detection needs at least 2 periods worth of data") return None # run length encode result of isnull, check for internal nulls if (len(lmap(lambda x: x[0], list(groupby(ps.isnull( ps.concat([ps.Series([np.nan]), data.value, ps.Series([np.nan])])))))) > 3): raise ValueError("Data contains non-leading NAs. We suggest replacing NAs with interpolated values (see na.approx in Zoo package).") else: data = data.dropna() # -- Step 1: Decompose data. This returns a univarite remainder which will be used for anomaly detection. Optionally, we might NOT decompose. data = data.set_index('timestamp') if not isinstance(data.index, ps.Int64Index): resample_period = { 1440: 'T', 24: 'H', 7: 'D' } resample_period = resample_period.get(num_obs_per_period) if not resample_period: raise ValueError('Unsupported resample period: %d' %resample_period) data = data.resample(resample_period) decomp = stl(data.value, np=num_obs_per_period) # Remove the seasonal component, and the median of the data to create the # univariate remainder d = { 'timestamp': data.index, 'value': data.value - decomp['seasonal'] - data.value.median() } data = ps.DataFrame(d) p = { 'timestamp': decomp.index, 'value': ps.to_numeric((decomp['trend'] + decomp['seasonal']).truncate()) } data_decomp = ps.DataFrame(p) # Maximum number of outliers that S-H-ESD can detect (e.g. 49% of data) max_outliers = int(num_obs * k) if max_outliers == 0: raise ValueError( "With longterm=TRUE, AnomalyDetection splits the data into 2 week periods by default. You have %d observations in a period, which is too few. Set a higher piecewise_median_period_weeks." % num_obs) # Define values and vectors. n = len(data.timestamp) R_idx = lrange(max_outliers) num_anoms = 0 # Compute test statistic until r=max_outliers values have been # removed from the sample. for i in lrange(1, max_outliers + 1): if one_tail: if upper_tail: ares = data.value - data.value.median() else: ares = data.value.median() - data.value else: ares = (data.value - data.value.median()).abs() # protect against constant time series data_sigma = mad(data.value) if data_sigma == 0: break ares /= float(data_sigma) R = ares.max() temp_max_idx = ares[ares == R].index.tolist()[0] R_idx[i - 1] = temp_max_idx data = data[data.index != R_idx[i - 1]] if one_tail: p = 1 - alpha / float(n - i + 1) else: p = 1 - alpha / float(2 * (n - i + 1)) t = student_t.ppf(p, (n - i - 1)) lam = t * (n - i) / float(sqrt((n - i - 1 + t**2) * (n - i + 1))) if R > lam: num_anoms = i if num_anoms > 0: R_idx = R_idx[:num_anoms] else: R_idx = None return { 'anoms': R_idx, 'stl': data_decomp }
def apply_at_depth(f, *args, **kwds): '''Takes a function and its arguments (assumed to all be arrays) and applies boxing to the arguments so that various re-broadcasting can occur Somewhat similar to vectorize and J's rank conjunction (") f: a function that acts on arrays and returns an array args: the arguments to f (all arrays) depending on depths, various subarrays from these are what actually get passed to f kwds: depths (or depth): an integer or list of integers with the same length as args (default 0) broadcast_results: a boolean that determines if broadcasting should be applied to the results (default False) Returns: a new array based on f mapped over various subarrays of args Examples: One way to think about apply_at_depth is as replacing this kind of construct: a, b = args l = [] for i in range(a.shape[0]): ll = [] for j in range(a.shape[1]): ll.append(f(a[i, j], b[j])) l.append(ll) result = np.array(l) This would simplify to: apply_at_depth(f, a, b, depths=[2, 1]) except that apply_at_depth handles all sorts of other types of broadcasting for you. Something like this could be especially useful if the "f" in question depends on its arguments having certain shapes but you have data structures with those as subsets. The algorithm itself is as follows: * box each arg at the specified depth (box_list) See docs for "box" for more details * broadcast each boxed argument to a common shape (bbl, short for broadcasted box_list) Note that box *contents* can still have any shape * flatten each broadcasted box (bbl_flat) Each element of bbl_flat will be a 1D list of arrays where each list had the same length (for clarity, lets call these lists l0, l1, l2, etc) * map f over these flat boxes like so: [f(l0[i], l1[i], ...) for i in range(arg_size)] or just map(f, *bbl_flat) Again, arg0[i] will still be an array that can have arbitrary shape and will be some subarray of args[0] (ex: args[0][2,1]) * Optionally broadcast the results (otherwise force all outpus to have the same shape) and construct a single array from all the outputs * Reshape the result to account for the flattening that happened to the broadcasted boxes This is the same way that unboxing works. * Celebrate avoiding unnecessarily complex loops :) This function is as efficient as it can be considering the generality; if f is reasonably slow and the arrays inside the boxes are fairly large it should be fine. However, performance may be a problem if applying it to single elements In other words, with: a = np.arange(2000).reshape(200, 2, 5) do this: apply_at_depth_ravel(np.sum, a, depth=1) instead of this: apply_at_depth(np.sum, a, depth=1) The latter is just essentially calling map(np.sum, a)''' assert not ('depth' in kwds and 'depths' in kwds), ( 'You can pass either kwd "depth" or "depths" but not both!') depths = kwds.pop('depths', kwds.pop('depth', 0)) # Grab depths or depth, fall back to 0 broadcast_results = kwds.pop('broadcast_results', False) depths = (depths if hasattr(depths, '__len__') else [depths] * len(args)) assert len(args) == len(depths) boxed_list = lmap(box, args, depths) bbl = _broadcast_arr_list(boxed_list) bb_shape = box_shape(bbl[0]) bbl_flat = lmap(np.ravel, bbl) results = lmap(f, *bbl_flat) results = (results if not broadcast_results else _broadcast_arr_list(results)) arr = np.array(results) return arr.reshape(bb_shape + arr.shape[1:])
def nd_gradient(shape, origin_val, stopvals): grids = np.mgrid.__getitem__(lmap(slice, shape)) ortho_grads = [g * (stop - origin_val) / (s - 1) for s, stop, g in zip(shape, stopvals, grids)] return origin_val + sum(ortho_grads)