def _generate_docs_for_raw_config( raw_file_config: DirectIngestRawFileConfig, ) -> str: """Generates documentation for the given raw file config and returns it as a string.""" file_columns = sorted(raw_file_config.columns, key=lambda col: col.name) primary_key_columns = [ col.upper() for col in raw_file_config.primary_key_cols ] def _is_primary_key(column: str) -> str: return "YES" if column.upper() in primary_key_columns else "" documentation = ( f"## {raw_file_config.file_tag}\n\n{raw_file_config.file_description}\n\n" ) table_matrix = [[ column.name, column.description or "<No documentation>", _is_primary_key(column.name), ] for column in file_columns] writer = MarkdownTableWriter( headers=["Column", "Column Description", "Part of Primary Key?"], value_matrix=table_matrix, margin=1, ) documentation += writer.dumps() return documentation
def Create_mdTable(df): ''' I use this function to create MD style tables ''' writer = MarkdownTableWriter() writer.from_dataframe(df) return writer.write_table()
def _generate_raw_file_table( self, config_paths_by_file_tag: Dict[str, str], file_tags_with_raw_file_configs: List[str], views_by_raw_file: Dict[str, List[str]], ) -> str: """Generates a Markdown-formatted table of contents to be included in a raw file specification.""" table_matrix = [[ (f"[{file_tag}](#{file_tag})" if file_tag in file_tags_with_raw_file_configs else f"{file_tag}"), ",<br />".join(views_by_raw_file[file_tag]), self._get_last_updated(config_paths_by_file_tag[file_tag]), self._get_updated_by(config_paths_by_file_tag[file_tag]), ] for file_tag in sorted(config_paths_by_file_tag)] writer = MarkdownTableWriter( headers=[ "**Table**", "**Referencing Views**", "**Last Updated**", "**Updated By**", ], value_matrix=table_matrix, margin=1, ) return writer.dumps()
def _get_metrics_str_for_product(self, metric_keys: Set[DagKey]) -> str: """Builds the Metrics string for the product markdown file. Creates a table of necessary metric types and whether a state calculates those metrics""" metrics_header = ( "##METRICS\n_All metrics required to support this product and" " whether or not each state regularly calculates the metric._" "\n\n** DISCLAIMER **\nThe presence of all required metrics" " for a state does not guarantee that this product is ready to" " launch in that state.\n\n") if not metric_keys: return (metrics_header + "*This product does not rely on Dataflow metrics.*\n") state_codes = sorted(self._get_dataflow_pipeline_enabled_states(), key=lambda code: code.value) headers = ["**Metric**"] + [ f"**{state_code.value}**" for state_code in state_codes ] table_matrix = [[ f"[{DATAFLOW_TABLES_TO_METRIC_TYPES[metric_key.table_id].value}](../../metrics/{self.generic_types_by_metric_name[metric_key.table_id].lower()}/{metric_key.table_id}.md)" ] + [ "X" if DATAFLOW_TABLES_TO_METRIC_TYPES[metric_key.table_id].value in [ metric.name for metric in self.metric_calculations_by_state[ str(state_code.get_state())] ] else "" for state_code in state_codes ] for metric_key in sorted(metric_keys, key=lambda dag_key: dag_key.table_id)] writer = MarkdownTableWriter(headers=headers, value_matrix=table_matrix, margin=0) return metrics_header + writer.dumps()
def event_report(self): # TODO: Use pandas dataframe's feature _all_reports = '' all_events = collections.OrderedDict( sorted(self._container.events.items(), key=lambda t: len(t[0]))) for attr, event_list in all_events.items(): writer = MarkdownTableWriter() writer.title = attr if len(event_list) == 0: continue else: keys = list(event_list[0].keys()) values = [] writer.headers = keys for event in event_list: line = [] for k in keys: event_val = event[k] if isinstance(event_val, (np.ndarray, np.generic)): event_val = event_val.tolist() if isinstance(event_val, int): line.append(event_val) elif isinstance(event_val, float): line.append(event_val) elif isinstance(event_val, list): if all(isinstance(n, int) for n in event_val): line.append(', '.join(str(v) for v in event_val)) elif all(isinstance(n, float) for n in event_val): line.append(', '.join('{0:.5f}'.format(v) for v in event_val)) values.append(line) writer.value_matrix = values _all_reports += writer.dumps() return _all_reports
def debug(self): parallel_matches = self._parallelize() writer = MarkdownTableWriter() writer.table_name = "debug" writer.headers = ["Src slice", "Index src", "Text src", "", "Text tgt", "Index tgt", "Tgt slice"] writer.column_styles = [Style(align='center')]*7 rows = [] for source_sequence, target_sequence in parallel_matches: source_sequence.context = self._source.context target_sequence.context = self._target.context col1 = source_sequence.slice_representation() col2 = "\n".join([str(i) for i in source_sequence.iter_index()]) col3 = "\n".join([s.context.get_sequence_text(s) for s in source_sequence]) col4 = '--->' col5 = "\n".join([s.context.get_sequence_text(s) for s in target_sequence]) col6 = "\n".join([str(i) for i in target_sequence.iter_index()]) col7 = target_sequence.slice_representation() for a, b, c, d, e, f, g in zip_longest( [col1], col2.split('\n'), col3.split('\n'), [col4], col5.split('\n'), col6.split('\n'), [col7]): rows.append([ a, b, c, d, e, f, g ]) rows.append([""]*7) rows.pop() writer.value_matrix = rows writer.write_table()
def compare_files_on_percentage_byte_overlap(title, files, keys=None): if keys == None: keys = [file.split(".")[0] for file in get_dat_files(files[0])] fnames = [file.split("/")[-1] for file in files] header = ["section", "file1", "file2", "percentage overlap"] matrix = [] for key in keys: datas = [open(file + "/{}.dat".format(key), 'rb').read() for file in files] for i in range(len(datas)): for j in range(len(datas)): if i >= j: continue d1 = datas[i] d2 = datas[j] v = average_overlap(d1, d2) # v = round(v, 3) matrix.append([key, fnames[i], fnames[j], v]) writer = MarkdownTableWriter() writer.table_name = title writer.value_matrix = matrix writer.headers = header return writer
def make_table(result_dict): """Generate table of results.""" from pytablewriter import MarkdownTableWriter, LatexTableWriter md_writer = MarkdownTableWriter() latex_writer = LatexTableWriter() md_writer.headers = ["Task", "Version", "Metric", "Value", "", "Stderr"] latex_writer.headers = ["Task", "Version", "Metric", "Value", "", "Stderr"] values = [] for k, dic in result_dict["results"].items(): version = result_dict["versions"][k] for m, v in dic.items(): if m.endswith("_stderr"): continue if m + "_stderr" in dic: se = dic[m + "_stderr"] values.append([k, version, m, "%.4f" % v, "±", "%.4f" % se]) else: values.append([k, version, m, "%.4f" % v, "", ""]) k = "" version = "" md_writer.value_matrix = values latex_writer.value_matrix = values # todo: make latex table look good # print(latex_writer.dumps()) return md_writer.dumps()
def write(self, title, math, relative=None, reverse_speedup=False): writer = MarkdownTableWriter() writer.table_name = f'{title} - {math.upper()}' main_header = ['**Batch Size**', '**Beam Size**'] data_header = [f'**Avg ({self.unit})**'] data_header += [f'**{p}% ({self.unit})**' for p in self.percentiles] if relative: speedup_header = ['**Speedup**'] * len(data_header) data_header = interleave(data_header, speedup_header) writer.headers = main_header + data_header writer.value_matrix = [] for k, v in self.data.items(): batch_size, beam_size = k avg, res_percentiles = v[math] main = [batch_size, beam_size] data = [avg, *res_percentiles] if relative: rel = self.data[k][relative] rel_avg, rel_res_percentiles = rel rel = [rel_avg, *rel_res_percentiles] speedup = [d / r for (r, d) in zip(rel, data)] if reverse_speedup: speedup = [1 / s for s in speedup] data = interleave(data, speedup) writer.value_matrix.append(main + data) writer.write_table()
def create_section(section_list, name): counter = 0 section_str = '## [↑](#-table-of-contents) {}\n\n'.format(name) # header = ['Task', 'Dataset', 'SOTA', 'Metric', 'SOTA Acc', 'Our Acc', 'Our Model', '📝', 'Notebook'] # header = ['Task', 'Dataset', 'SOTA', 'Metric', 'SOTA Acc', 'Our Acc', '📝', 'Notebook'] header = [ 'Task', 'Dataset', 'SOTA', 'SOTA Acc', 'Our Acc', '📝', 'Notebook' ] values_matrix = [] for row in section_list: values_matrix.append([ row[0], '[{}]({})'.format(row[1], row[2]) if row[2] else row[1], '[{}]({})'.format(row[3], row[4]) if row[4] else row[3], # row[5], row[6], row[7], # row[8], '[📝]({} "Article")'.format(row[9]), '[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)]' '(https://colab.research.google.com/github/eugenesiow/practical-ml/blob/master/notebooks/{}' ' "Open in Colab")'.format(row[10]) ]) counter += 1 writer = MarkdownTableWriter( headers=header, value_matrix=values_matrix, ) section_str += writer.dumps() section_str += '\n\n' return section_str, counter
def evaluate(self, metrics1=None, metrics2=None, metrics3=None, metrics4=None): """ Summarizes the metrics into a table. """ from pytablewriter import MarkdownTableWriter metrics = [mtc for mtc in [metrics1, metrics2, metrics3, metrics4] if mtc is not None] method_names = [mtc[0] for mtc in metrics] metric_dicts = [mtc[1] for mtc in metrics] metric_names = metric_dicts[0].keys() headers = [""] + method_names value_matrix = [] for name in metric_names: value_row = [name] + [mdict[name] for mdict in metric_dicts] value_matrix.append(value_row) writer = MarkdownTableWriter( table_name="Report", headers=headers, value_matrix=value_matrix) writer.write_table()
def tab_comp(df_all, df_interpol, varname1, varname2, filename): df = pd.DataFrame(columns=varname1) df['metric'] = [ 'RMSE', 'bias', 'R2', 'N', 'RMSE', 'bias', 'R2', 'N', 'RMSE', 'bias', 'R2', 'N' ] df['time'] = [ 'all', 'all', 'all', 'all', 'night', 'night', 'night', 'night', 'day', 'day', 'day', 'day' ] df.set_index(['metric', 'time'], inplace=True) sza = df_interpol['sza'] day = sza < 70 night = sza > 110 for i in range(np.size(varname1)): x = df_interpol[varname1[i]].values y = df_interpol[varname2[i]].values x2 = x[~np.isnan(y) & ~np.isnan(x)] y2 = y[~np.isnan(y) & ~np.isnan(x)] df.loc[('R2', 'all'), varname1[i]] = r2_score(x2, y2) df.loc[('bias', 'all'), varname1[i]] = np.mean(x2 - y2) df.loc[('RMSE', 'all'), varname1[i]] = mean_squared_error(x2, y2) df.loc[('N', 'all'), varname1[i]] = len(x2) x = df_interpol.loc[night, varname1[i]].values y = df_interpol.loc[night, varname2[i]].values x2 = x[~np.isnan(y) & ~np.isnan(x)] y2 = y[~np.isnan(y) & ~np.isnan(x)] df.loc[('R2', 'night'), varname1[i]] = r2_score(x2, y2) df.loc[('bias', 'night'), varname1[i]] = np.mean(x2 - y2) df.loc[('RMSE', 'night'), varname1[i]] = mean_squared_error(x2, y2) df.loc[('N', 'night'), varname1[i]] = len(x2) x = df_interpol.loc[day, varname1[i]].values y = df_interpol.loc[day, varname2[i]].values x2 = x[~np.isnan(y) & ~np.isnan(x)] y2 = y[~np.isnan(y) & ~np.isnan(x)] df.loc[('R2', 'day'), varname1[i]] = r2_score(x2, y2) df.loc[('bias', 'day'), varname1[i]] = np.mean(x2 - y2) df.loc[('RMSE', 'day'), varname1[i]] = mean_squared_error(x2, y2) df.loc[('N', 'day'), varname1[i]] = len(x2) trunc = lambda x: math.trunc(100 * x) / 100 df = df.applymap(trunc) df = df.reset_index() df.to_csv(filename + '.csv') writer = MarkdownTableWriter() writer.from_dataframe(df) writer.write_table() # change the output stream to a file with open(filename + '.md', "w") as f: writer.stream = f writer.write_table()
def entity_table(): writer = MarkdownTableWriter() writer.table_name = "Entity Cross-Validation Results (5 folds)" with open('results/DIETClassifier_report.json', 'r') as f: data = json.loads(f.read()) cols = ["support", "f1-score", "precision", "recall"] writer.headers = ["entity"] + cols classes = list(data.keys()) classes.sort(key = lambda x: data[x]['support'], reverse=True) def format_cell(data, c, k): if not data[c].get(k): return "N/A" else: return data[c][k] writer.value_matrix = [ [c] + [format_cell(data, c, k) for k in cols] for c in classes ] return writer.dumps()
def print(self): writer = MarkdownTableWriter() writer.headers = self._make_headers() writer.value_matrix = self._make_value_matrix() writer.styles = [Style(align="left")] + [Style(align="center") for _ in range(len(writer.headers) - 1)] writer.write_table() print("\n" + self._make_versions_text())
def main(): # Split the data in various ways qa_df = pd.read_csv("../output/QandA.csv") df_description = qa_df.describe() group_type = qa_df.groupby('responsetype') group_type_description = group_type.describe() answers = qa_df[qa_df.responsetype == 'answer'] questions = qa_df[qa_df.responsetype == 'question'] # Make a table for putting in the github markdown writer = MarkdownTableWriter() writer.from_dataframe(df_description, add_index_column=True) writer.write_table() # Generate the different bar charts barchart(group_type, 'Response Type', 'responses') barchart(questions.groupby('name'), 'Who Asked Questions', 'question_names', rotatation=90) barchart(questions.groupby('company'), 'Which Companies Asked Questions', 'question_companies', rotatation=90) barchart(answers.groupby('name'), 'Who Gave Answers', 'answer_names')
def create_writer(cls, table_name, headers, values): writer = MarkdownTableWriter() writer.table_name = table_name writer.headers = headers writer.value_matrix = values writer.margin = 1 return writer
def get_data_fields_description(self): output_parts = [] for config_name, config_info in self.configs_info.items(): headers = ["field name", "type", "description"] values = [] for field_name, field_description in config_info["fields"].items(): field_info = self.last_class_info.get(field_name, {}) try: type = field_info["type"] comment = field_info["comment"] except: print("MISSING FIELD INFORMATION", self.dataset_name, config_name, field_name, field_info) raise values.append([field_name, type, comment]) writer = MarkdownTableWriter( table_name=f"### {config_name}", headers=headers, value_matrix=values ) output_parts.append(self.get_markdown_string(writer)) all_the_same = all([output_part == output_parts[0] for output_part in output_parts]) if all_the_same: output = "#### " + ", ".join(list(self.configs_info.keys())) + "\n\n" output += output_parts[0] else: output = "" for index, config_name in enumerate(self.configs_info): output += f"#### {config_name}\n\n" output += output_parts[index] return output
def generate_md_revision_sheet(filepath, result_path, lines, starting_item): writer = MarkdownTableWriter() if result_path: result_path = result_path else: result_path = result_path + "revision_sheet.md" writer.headers = ["Nr.", "Comment", "How the comment is addressed"] comment_list = [] for line in lines: if not line.strip(): continue comment_list.append(line.replace('\\newline', '')) matrix = [] for i in range(len(comment_list)): matrix.append([starting_item, comment_list[i], ""]) starting_item += 1 print(matrix) writer.value_matrix = matrix writer.write_table() with open(result_path, "w") as f: writer.stream = f writer.write_table() return
def aggregated_config_splits(self): """Try to build an aggregated markdown table with sizes for each split for each config, if all configs have the same splits.""" first_config = list(self.configs_info.keys())[0] config_splits0 = list( self.configs_info[first_config]["split_sizes"].keys()) headers = ["name"] + config_splits0 values = [] same_splits = True for k, v in self.configs_info.items(): config_splits = list(v["split_sizes"].keys()) if config_splits != config_splits0: same_splits = False break values.append([k] + [v["split_sizes"][key] for key in config_splits0]) if same_splits: writer = MarkdownTableWriter(table_name="### Configurations", headers=headers, value_matrix=values) ret = self.get_markdown_string(writer) return ret else: # The splits are not the same -> no aggregated table return None
def print_starred_info(starred_info_set, repo_depth_map, verbosity): records = [] for info in sorted(starred_info_set): record = [ info.pypi_pkg_name, info.github_repo_id, _star_status_map[info.star_status], info.is_owned if info.star_status in [StarStatus.STARRED, StarStatus.NOT_STARRED] else _NA, repo_depth_map[info.pypi_pkg_name.lower()], info.url, ] records.append(record) writer = MarkdownTableWriter() writer.headers = ["Package", "Repository", "Starred", "Owner"] if verbosity is not None: if verbosity >= 1: writer.headers += ["Depth"] if verbosity >= 2: writer.headers += ["URL"] writer.value_matrix = sorted(records, key=itemgetter(4, 0)) # sorted by depth writer.margin = 1 writer.register_trans_func(bool_to_checkmark) writer.set_style("Starred", Style(align="center")) writer.set_style("Owner", Style(align="center")) pager(writer.dumps())
def write(self, title, write_math): writer = MarkdownTableWriter() writer.table_name = f'{title}' main_header = ['**Batch Size**', '**Beam Size**'] data_header = [] if 'fp32' in write_math: data_header += [f'**Accuracy - FP32 ({self.unit})**'] if 'tf32' in write_math: data_header += [f'**Accuracy - TF32 ({self.unit})**'] if 'fp16' in write_math: data_header += [f'**Accuracy - FP16 ({self.unit})**'] writer.headers = main_header + data_header writer.value_matrix = [] for k, v in self.data.items(): batch_size, beam_size = k row = [batch_size, beam_size] if 'fp32' in write_math: row.append(v['fp32']) if 'tf32' in write_math: row.append(v['tf32']) if 'fp16' in write_math: row.append(v['fp16']) writer.value_matrix.append(row) writer.write_table()
def mk_table(datasets): values = [] total_weight = sum([x[1] * x[0].size() for x in datasets]) train_chars = 1.2e12 for dataset, weight in datasets: size = dataset.size() relative_weight = size * weight / total_weight values.append([ dataset.name(), size, '{:.2%}'.format(relative_weight), train_chars / size * relative_weight, humanbytes(size / dataset.num_docs()) ]) values.sort(key=lambda x: -x[1]) values.append([ '**Total**', sum([x[1] for x in values]), "", "", humanbytes( sum([x[1] for x in values]) / sum(x[0].num_docs() for x in datasets)) ]) values = [[x[0], humanbytes(x[1]), x[2], x[3], x[4]] for x in values] writer = MarkdownTableWriter() writer.table_name = "The Pile™" writer.headers = [ "Component", "Size", "Weight", "Epochs (@1.2TB)", "Mean Document Size" ] writer.value_matrix = values return writer.dumps()
def gen_group(group, parameters, output): parameter_fields = ["name", "description", "default"] display_name = group.get("displayName", group.get("name")) output.write(f'## <a name="{group.get("name")}"></a> {display_name} \n') if "description" in group: output.write(f'{group.get("description")}\n') output.write('\n\n') paramTable = MarkdownTableWriter() # paramTable.table_name = group["displayName"] paramTable.styles = [ Style(align="left", font_weight="bold"), Style(align="left"), Style(align="left"), ] paramTable.headers = [field.capitalize() for field in parameter_fields] paramTable.margin = 1 paramTable.value_matrix = [ list([parameter.get(field) for field in parameter_fields]) for parameter in parameters if parameter.get("group", "") == group.get("name") ] paramTable.stream = output paramTable.write_table() return 0
def _get_fields(fields: List[sqlalchemy.Column]) -> str: """Returns a table of the entity's fields and their corresponding descriptions.""" if fields is None: return "No Fields" if not fields: return "<No columns>" table_matrix = [] for field in fields: if field.comment is None: raise ValueError( f"Every entity field must have an associated comment. " f"Field {field.name} has no comment.") field_values = [ field.name, field.comment, f"ENUM: <br />{'<br />'.join([f'{e}' for e in field.type.enums])}" if hasattr(field.type, "enums") else field.type.python_type.__name__.upper(), ] table_matrix.append(field_values) writer = MarkdownTableWriter( headers=[ "Entity Field", "Entity Description", "Entity Type", ], value_matrix=table_matrix, margin=0, ) return writer.dumps()
def generate_md_table(data, headers): writer = MarkdownTableWriter() writer.headers = headers writer.column_styles = [Style(align="center", font_weight="bold") ] * len(headers) writer.value_matrix = data return writer.dumps()
def _repr_markdown_(self): """Outuput for Markdown.""" is_supergroup = self.__contains_groups() rsl = [] rsl += self._row_values(is_supergroup) if is_supergroup: total = str(self.total()) rsl.append(["", "", "", ""]) rsl.append([ "", f"**_Total {self.name}_**", f"**_{total}_**", "", ]) writer = MarkdownTableWriter( headers=[ "Pos.", "Bezeichnung", "Betrag", "Anmerkung", ], column_styles=[ TableStyle(align="right"), TableStyle(align="left"), TableStyle(align="right"), TableStyle(align="left"), ], value_matrix=rsl, margin=1, ) return writer.dumps()
def intent_table(): writer = MarkdownTableWriter() writer.table_name = "Intent Cross-Validation Results (5 folds)" with open('results/intent_report.json', 'r') as f: data = json.loads(f.read()) cols = ["support", "f1-score", "confused_with"] writer.headers = ["class"] + cols classes = list(data.keys()) classes.remove('accuracy') classes.sort(key=lambda x: data[x]['support'], reverse=True) def format_cell(data, c, k): if not data[c].get(k): return "N/A" if k == "confused_with": return ", ".join([f"{k}({v})" for k, v in data[c][k].items()]) else: return data[c][k] writer.value_matrix = [[c] + [format_cell(data, c, k) for k in cols] for c in classes] return writer.dumps()
def game_after_survival_training_no_interrupt(repeat_times): move_counts = 12000 data = [['Lives Consumed', 'Moves', 'Ratio']] for count in range(repeat_times): mp, sc, lil = curses.wrapper(draw_menu_after_survival_no_interrupt, move_counts) ratioo = float(lil) / float(mp) data.append([lil, mp, ratioo]) data_np = pd.DataFrame(data) data_np_lack = pd.DataFrame(data[1:]) data_np = data_np.append(data_np_lack.mean(axis=0), ignore_index=True) data_np = np.transpose(data_np) data_np_list = np.array(data_np).tolist() writer = MarkdownTableWriter() writer.table_name = "collecting data after survival training" writer.headers = [" "] + \ [str(i + 1) for i in range(repeat_times)] + ["average"] writer.value_matrix = data_np_list table_output = writer.dumps() logger = logging.getLogger('no_interrupt') no_fire_stat = 'no_fires status: ' + \ str(nf_global_survival_training) + '\n' enemy_freq_stat = 'enemy freq status: ' + \ str(enemy_freq_sur_train) + '\n' ot_str = no_fire_stat + enemy_freq_stat + \ 'After survival training: \n' + table_output logger.info(ot_str)
def topics_to_md(model,topics_dict): writer = MarkdownTableWriter() writer.table_name = model writer.from_dataframe( pd.DataFrame(topics_dict), add_index_column=True, ) writer.write_table()
def markdown_output(xheader, yheader, matrix, table_name=None): writer = MarkdownTableWriter() writer.table_name = table_name writer.headers = [''] + list(xheader) writer.value_matrix = [[yheader[i]] + list(line) for i, line in enumerate(matrix)] writer.write_table() return writer