def genMaintenanceBox(maintenancePeriods: List[IPeriod_with_window], **kwargs): """ Parameters ---------- maintenancePeriods: List[str] メンテナンスの開始日時と終了日時を表す文字列のリスト. Returns ------- maintenancePlot: Callable Dict -> pandas.DataFrame -> ax -> ax メンテナンス期間を表す長方形をプロットするためのアクション. Example ------- from src.plot_util import Figure from src.setting import default, maintenance from src.plot_maintenance import getMaintenanceBox, presetSubplot maintenanceBox = genMaintenanceBox(*maintenance["st1-1"]) figure = Figure() figure.add_subplot( presetSubplot(default)( name, fileSelector=[site], plot=[*maintenanceBox] ) ) figure.show() """ return bandPlot( pip( it.mapping(lambda p: p.get_period()[0] if isinstance(p, IPeriod_with_window) else p), it.mapping(pd.to_datetime), tuple)(maintenancePeriods), ** kwargs) if len(maintenancePeriods) > 0 else bandPlot([None])
def generate(site: SiteObject, machineNames: List[str], limit={}, style={}, saveDir=None, file=""): subplotStyle = { **default.get_axes_style(), "xFmt": "%m/%d\n%H:%M", **style } unique_maintenance_date_pairs = pip( it.mapping(lambda so: so.get_date_pairs_of_maintenance()), it.mapping(lambda pairs: it.reducing( lambda acc, e: [*acc, e] if e not in acc else acc)([])(pairs)), it.reducing(lambda acc, e: [*acc, *it.filtering(lambda v: v not in acc)(e)])([]), lambda dates: sorted( dates, key=lambda item: pd.to_datetime(item[0])))([site]) returns = [] for start_date, end_date in unique_maintenance_date_pairs: subplotLimit = { "xlim": site.get_maintenance([start_date], [end_date])[0].get_period()[1], **limit } figure = Figure() for name in machineNames: maintenanceBox = genMaintenanceBox( site.get_maintenance([start_date], [end_date], key=name)) figure.add_subplot(presetSubplot(default)( name, fileSelector=site.get_file_selector(), plot=[maintenanceBox], style=subplotStyle, limit=subplotLimit), names=name) save = save_plot( "./image/" + site.get_name() + "/" if saveDir == None else saveDir, f'{site.get_name()+file}-maintenance-{start_date}-{end_date}') returns.append((figure, save, [maintenanceBox], { "style": subplotStyle, "limit": subplotLimit })) return returns
def read(self, i) -> Duplicated[pd.DataFrame]: """ Indipendent from type of data source. """ data: Duplicated = wrap_by_duplicate(self.data[i]) meta: Duplicated = wrap_by_duplicate(self.dataInfo[i]) default_transformers: Duplicated = self.default_transformers(i) data_transformers: Duplicated = wrap_by_duplicate( self.dataTransformer[i]) max_len = pip( it.mapping(len), it.reducing(lambda acc, e: acc if acc > e else e)(0) )([data, meta, default_transformers, data_transformers]) dfs = [] for j in range(max_len): d = get_from_duplicated(data, j, {}) m = get_from_duplicated(meta, j, {}) def_trans = get_from_duplicated(default_transformers, j, []) trans = get_from_duplicated(data_transformers, j, []) Loader = ISubplot.IDataLoader(d, self.isTest()) if self.isTest(): transformers = None else: transformers = def_trans + trans dfs.append(Loader.read(d, meta=m, transformers=transformers)) return Duplicated(*dfs)
def directory_from(*roots): map_get_paths = it.mapping(pip( getAllSubPath, it.filtering(isMatchAll(patterns)), )) def concat(acc, e): return [*acc, *e] return PathList(it.reducing(concat)([])(map_get_paths(roots)))
def rose( data: DataSource, x, # factor1 selector y: str, # stack factor selector yagg, # aggregate *arg, width=None, color=None, cmap=None, xfactor=None, # explicit factor list norm=False, **kwargs): x_factor_series, x_factor, position = Iget_factor(data, x, xfactor) x_group = data.groupby( pd.Categorical(x_factor_series, ordered=True, categories=x_factor)) subset_for_x_factor = [ data.loc[x_group.groups[xfname]] for xfname in x_factor ] # aggrigation時にnanがあると, normalize時にsumがnanになる. # それを回避するためにfillna(0)してある. heights = pip(it.mapping(lambda df: yagg(df[y].fillna(0))), pd.Series)(subset_for_x_factor) colors = pip(it.mapping(lambda df: get_literal_or_series(color, df)), pd.Series)(subset_for_x_factor) if norm: sum = np.sum(heights) heights = heights.apply(lambda height: 0 if sum == 0 else height / sum) plot_arg = {"width": width, "color": colors, **kwargs} @gen_plotter def plot(ax): return ax.bar(position, heights, **plot_arg) return plot
def figure_and_axes(self, subgrid_names: List[str] = None, padding: dict | list = {}, figsize: Optional[Size] = None, dpi: Optional[int] = None, **figure_kwargs) -> Tuple[Fig, List[Ax]]: """ Generate matplotlib.pyplot.figure and its subplots of matplotlib.pyplot.axsubplot. This method also takes key word arguments same with matplotlib.pyplot.figure. Paraeters --------- subgrids: list[Subgrid] List of Subgrids generated by this instance. padding: dict, optional Dictionary to overwrite default padding size around plot areas of subplots. It can have keys "top", "left", "bottom", and "right. If padding are too small, axises may be out of image. Default value is empty dictionaly. figsize: tuple(float), optional Tuple with 2 float number (width, height) to overwrite figure size. Default value is None. kwargs: Key word arguments compatible to matplotlib.pyplot.figure. Return ------ fig: matplotlib.figure.Figure axs: list[matplotlib.axes._subplots.AxesSubplot] """ subgrids = self.get_subgrids( subgrid_names ) if subgrid_names is not None else self.get_all_subgrids() fig = plt.figure( figsize=self.get_size() if figsize is None else figsize, dpi=self.dpi, **dict(self.default_figure_style, **figure_kwargs)) axs = pip(mapping(self.generate_axes(fig, padding)), list)(subgrids) return (fig, axs)
def detect_encoding(self, path: str, header: int): with open(path, mode='rb') as f: detector = UniversalDetector() i = 0 lines = [] for line in f: if (i >= header or detector.done): break detector.feed(line) lines.append(line) i = i+1 detector.close() encoding = detector.result['encoding'] if detector.result['encoding'] != None else "shift-JIS" if self.is_verbose: pip( enumerate, it.mapping(lambda t: (t[0], str(t[1], encoding=encoding))), list, print )(lines) return encoding
def generate(site_objects: List[SiteObject], machineName: str, limit={}, style={}, saveDir=None, file=""): subplotStyle = { **default.get_axes_style(), "xFmt": "%m/%d\n%H:%M", **style } unique_maintenance_date_pairs = pip( it.mapping(lambda so: so.get_date_pairs_of_maintenance()), it.mapping(lambda pairs: it.reducing( lambda acc, e: [*acc, e] if e not in acc else acc)([])(pairs)), it.reducing(lambda acc, e: [*acc, *it.filtering(lambda v: v not in acc)(e)])([]), lambda dates: sorted( dates, key=lambda item: pd.to_datetime(item[0])))(site_objects) returns = [] for start_date, end_date in unique_maintenance_date_pairs: figure = Figure() # siteごとにメンテナンスの回数が異なる. subplotLimit = { "xlim": pip( it.mapping(lambda so: so.get_maintenance([start_date], [end_date])), it.filtering(lambda p: len(p) > 0), list)(site_objects)[0][0].get_period()[1], **limit } for so in site_objects: maintenanceBox = genMaintenanceBox( so.get_maintenance([start_date], [end_date], key=machineName)) figure.add_subplot(presetSubplot(default)( machineName, fileSelector=so.get_file_selector(), plot=[maintenanceBox], option={}, ylabel=so.get_name(), style=subplotStyle, limit=subplotLimit), names=machineName + "-" + so.get_name()) save = save_plot( f'./image/{machineName if saveDir is None else saveDir}/', f'{machineName}{file}-maintenance-{start_date}-{end_date}') returns.append((figure, save, { "style": subplotStyle, "limit": subplotLimit })) return returns
def get_date_pairs(self) -> List[Tuple[str, str]]: return pip( it.mapping(lambda p: (p.get_start_date(), p.get_end_date())), list)(self.get_periods())
def get_end_dates(self) -> List[str]: return pip(it.mapping(lambda p: p.get_end_date()), list)(self.get_periods())
def isMatchAll(patterns): return lambda s: it.reducing(lambda a, b: a and b)(True)( it.mapping(lambda pattern: re.search(pattern, s) != None)(patterns) )
def to_duplicate(d: dict) -> dict: return dict( it.mapping(lambda kv: (kv[0], kv[1]) if type(kv[1]) is DuplicateLast else (kv[0], DuplicateLast(kv[1])))( d.items()))
def isAllIn(obj: Dict[Any, Any]) -> Callable[[List[Any]], bool]: return lambda props: isAllTrue(it.mapping(lambda prop: prop in obj)(props))
def factor_bar( data: DataSource, x, # factor1 selector y: str, # stack factor selector yagg, # aggregate *arg, xfactor=None, # explicit factor list yfactor=None, # explicit factor list width=None, color=None, norm=False, vert=True, legend_labels=None, legend={}, show_factor_ticks=True, map_of_xlabel=lambda x: x, **kwargs): """ Stacking bar plot. xfactor, yfactor: List of factor values or function generate it from dataframe. xfactor is used for grouping x axis variable. yfactor is used for grouping stacking variable. yagg: Function of aggrigating operation. factor_bar( x="group_column", xfactor=xfactors, y="stack_column", yfactor=yfactors, yagg=lambda group: group.count() ) """ if len(data) == 0: return lambda ax: ax if type(y) is list: return bar(x=x, y=y, yagg=yagg, xfactor=xfactor, norm=norm, vert=vert, legend_labels=legend_labels, legend=legend, map_of_xlabel=map_of_xlabel, **kwargs)(data) """ 1. stacking bar plotのstackしていくgroupingをつくる """ stack_series, stack_factor, _ = Iget_factor(data, y, yfactor) stack_group = data.groupby( pd.Categorical(stack_series, ordered=True, categories=stack_factor)) """ 2. stack groupごとにそれぞれfactorごとにgroupingする. * すべてのstack groupごとにx_factorの長さが同じである必要があるので, 全データに基づくcommon_x_factorを記録しておく. 3. ax.bar(ind, bar_lengths_for_each_x_factor) """ stack_bars = [] for stack_name in stack_factor: subset = data.loc[stack_group.groups[stack_name]] x_factor_series, x_factor, position = Iget_factor(subset, x, xfactor) x_group = subset.groupby( pd.Categorical(x_factor_series, ordered=True, categories=x_factor)) subset_for_x_factor = [ subset.loc[x_group.groups[xfname]] for xfname in x_factor ] stack_heights = pip( it.mapping(lambda df: df.agg(yagg).values), it.mapping(lambda arr: arr[0] if len(arr) > 0 else 0), list)(subset_for_x_factor) stack_bars.append(stack_heights) if norm: sum = pip(it.mapping(np.sum), list)(zip(*stack_bars)) stack_bars = pip( it.mapping(lambda bars: pip( it.mapping(lambda t: 0 if (t[1] == 0) or np.isnan(t[1]) else t[ 0] / t[1]), it.mapping(lambda v: 0 if np.isnan(v) else v), list)(zip(bars, sum))), list)(stack_bars) plot_arg = { **kwargs, # "tick_label": kwargs.get("tick_label", x_factor) } @gen_plotter def plot(ax): prev_top = stack_bars[0] artists = [] for i, bar in enumerate(stack_bars): # print(prev_top) if vert: if i == 0: art = ax.bar(position, bar, **plot_arg) else: art = ax.bar(position, bar, bottom=prev_top, **plot_arg) prev_top = [a + b for a, b in zip(prev_top, bar)] artists.append(art) else: if i == 0: art = ax.barh(position, bar, **plot_arg) else: art = ax.barh(position, bar, left=prev_top, **plot_arg) prev_top = [a + b for a, b in zip(prev_top, bar)] artists.append(art) if (legend is not None) and (legend is not False): ax.legend(stack_factor if legend_labels is None else legend_labels, **legend) if not show_factor_ticks: return artists xlabels = list(map(map_of_xlabel(x_factor))) if vert: ax.set_xticks(position) ax.set_xticklabels(xlabels) ax.set_xlim([-0.5, len(xlabels) - 0.5]) else: ax.set_yticks(position) ax.set_yticklabels(xlabels) ax.set_ylim([-0.5, len(xlabels) - 0.5]) return artists return plot
def bar( data: DataSource, x, # factor1 selector y: str, # stack factor selector yagg, # aggregate: (DataFrame, Hashable) -> Number *arg, color=None, xfactor=None, # explicit factor list norm=False, vert=True, legend_labels=None, legend={}, show_factor_ticks=True, map_of_xlabel=lambda x: x, **kwargs): """ Plot bars. xfactor: List of factor values or function generate it from dataframe. yagg: Function for aggregating y factor. The spec is (pd.DataFrame, Hashable) -> Number Hashable is one of a column name in the DataFrame, which comes from stacking parameters. """ stack_factor = y if type(y) is list else [y] stack_bars = [] for stack_name in stack_factor: subset = data x_factor_series, x_factor, position = Iget_factor(subset, x, xfactor) x_group = subset.groupby( pd.Categorical(x_factor_series, ordered=True, categories=x_factor)) subset_for_x_factor = [ subset.loc[x_group.groups[xfname]] for xfname in x_factor ] # aggrigation時にnanがあると, normalize時にsumがnanになる. # それを回避するためにfillna(0)してある. stack_heights = pip( it.mapping(lambda df: yagg(df, stack_name)), it.mapping(lambda v: 0 if np.isnan(v) else v), # it.mapping(lambda arr: arr[0] if len(arr) > 0 else 0), list)(subset_for_x_factor) stack_bars.append(stack_heights) if norm: sum = pip(it.mapping(np.sum), list)(zip(*stack_bars)) # print(sum) stack_bars = pip( it.mapping(lambda bars: pip( it.mapping(lambda t: 0 if (t[1] == 0) or np.isnan(t[1]) else t[ 0] / t[1]), it.mapping(lambda v: 0 if np.isnan(v) else v), list)(zip(bars, sum))), list)(stack_bars) plot_arg = { **kwargs, # "tick_label": kwargs.get("tick_label", x_factor) } @gen_plotter def plot(ax): prev_top = stack_bars[0] artists = [] for i, bar in enumerate(stack_bars): # print(prev_top) if vert: if i == 0: art = ax.bar(position, bar, **plot_arg) else: art = ax.bar(position, bar, bottom=prev_top, **plot_arg) prev_top = [a + b for a, b in zip(prev_top, bar)] artists.append(art) else: if i == 0: art = ax.barh(position, bar, **plot_arg) else: art = ax.barh(position, bar, left=prev_top, **plot_arg) prev_top = [a + b for a, b in zip(prev_top, bar)] artists.append(art) if (legend is not None) and (legend is not False): ax.legend(stack_factor if legend_labels is None else legend_labels, **legend) if not show_factor_ticks: return artists xlabels = list(map(map_of_xlabel, x_factor)) if vert: ax.set_xticks(position) ax.set_xticklabels(xlabels) ax.set_xlim([-0.5, len(xlabels) - 0.5]) else: ax.set_yticks(position) ax.set_yticklabels(xlabels) ax.set_ylim([-0.5, len(xlabels) - 0.5]) return artists return plot