def compare_files_on_percentage_byte_overlap(title, files, keys=None): if keys == None: keys = [file.split(".")[0] for file in get_dat_files(files[0])] fnames = [file.split("/")[-1] for file in files] header = ["section", "file1", "file2", "percentage overlap"] matrix = [] for key in keys: datas = [open(file + "/{}.dat".format(key), 'rb').read() for file in files] for i in range(len(datas)): for j in range(len(datas)): if i >= j: continue d1 = datas[i] d2 = datas[j] v = average_overlap(d1, d2) # v = round(v, 3) matrix.append([key, fnames[i], fnames[j], v]) writer = MarkdownTableWriter() writer.table_name = title writer.value_matrix = matrix writer.headers = header return writer
def make_table(result_dict): """Generate table of results.""" from pytablewriter import MarkdownTableWriter, LatexTableWriter md_writer = MarkdownTableWriter() latex_writer = LatexTableWriter() md_writer.headers = ["Task", "Version", "Metric", "Value", "", "Stderr"] latex_writer.headers = ["Task", "Version", "Metric", "Value", "", "Stderr"] values = [] for k, dic in result_dict["results"].items(): version = result_dict["versions"][k] for m, v in dic.items(): if m.endswith("_stderr"): continue if m + "_stderr" in dic: se = dic[m + "_stderr"] values.append([k, version, m, "%.4f" % v, "±", "%.4f" % se]) else: values.append([k, version, m, "%.4f" % v, "", ""]) k = "" version = "" md_writer.value_matrix = values latex_writer.value_matrix = values # todo: make latex table look good # print(latex_writer.dumps()) return md_writer.dumps()
def _repr_markdown_(self): """Outuput for Markdown.""" is_supergroup = self.__contains_groups() rsl = [] rsl += self._row_values(is_supergroup) if is_supergroup: total = str(self.total()) rsl.append(["", "", "", ""]) rsl.append([ "", f"**_Total {self.name}_**", f"**_{total}_**", "", ]) writer = MarkdownTableWriter( headers=[ "Pos.", "Bezeichnung", "Betrag", "Anmerkung", ], column_styles=[ TableStyle(align="right"), TableStyle(align="left"), TableStyle(align="right"), TableStyle(align="left"), ], value_matrix=rsl, margin=1, ) return writer.dumps()
def _get_fields(fields: List[sqlalchemy.Column]) -> str: """Returns a table of the entity's fields and their corresponding descriptions.""" if fields is None: return "No Fields" if not fields: return "<No columns>" table_matrix = [] for field in fields: if field.comment is None: raise ValueError( f"Every entity field must have an associated comment. " f"Field {field.name} has no comment.") field_values = [ field.name, field.comment, f"ENUM: <br />{'<br />'.join([f'{e}' for e in field.type.enums])}" if hasattr(field.type, "enums") else field.type.python_type.__name__.upper(), ] table_matrix.append(field_values) writer = MarkdownTableWriter( headers=[ "Entity Field", "Entity Description", "Entity Type", ], value_matrix=table_matrix, margin=0, ) return writer.dumps()
def _get_metrics_str_for_product(self, metric_keys: Set[DagKey]) -> str: """Builds the Metrics string for the product markdown file. Creates a table of necessary metric types and whether a state calculates those metrics""" metrics_header = ( "##METRICS\n_All metrics required to support this product and" " whether or not each state regularly calculates the metric._" "\n\n** DISCLAIMER **\nThe presence of all required metrics" " for a state does not guarantee that this product is ready to" " launch in that state.\n\n") if not metric_keys: return (metrics_header + "*This product does not rely on Dataflow metrics.*\n") state_codes = sorted(self._get_dataflow_pipeline_enabled_states(), key=lambda code: code.value) headers = ["**Metric**"] + [ f"**{state_code.value}**" for state_code in state_codes ] table_matrix = [[ f"[{DATAFLOW_TABLES_TO_METRIC_TYPES[metric_key.table_id].value}](../../metrics/{self.generic_types_by_metric_name[metric_key.table_id].lower()}/{metric_key.table_id}.md)" ] + [ "X" if DATAFLOW_TABLES_TO_METRIC_TYPES[metric_key.table_id].value in [ metric.name for metric in self.metric_calculations_by_state[ str(state_code.get_state())] ] else "" for state_code in state_codes ] for metric_key in sorted(metric_keys, key=lambda dag_key: dag_key.table_id)] writer = MarkdownTableWriter(headers=headers, value_matrix=table_matrix, margin=0) return metrics_header + writer.dumps()
def _generate_docs_for_raw_config( raw_file_config: DirectIngestRawFileConfig, ) -> str: """Generates documentation for the given raw file config and returns it as a string.""" file_columns = sorted(raw_file_config.columns, key=lambda col: col.name) primary_key_columns = [ col.upper() for col in raw_file_config.primary_key_cols ] def _is_primary_key(column: str) -> str: return "YES" if column.upper() in primary_key_columns else "" documentation = ( f"## {raw_file_config.file_tag}\n\n{raw_file_config.file_description}\n\n" ) table_matrix = [[ column.name, column.description or "<No documentation>", _is_primary_key(column.name), ] for column in file_columns] writer = MarkdownTableWriter( headers=["Column", "Column Description", "Part of Primary Key?"], value_matrix=table_matrix, margin=1, ) documentation += writer.dumps() return documentation
def Create_mdTable(df): ''' I use this function to create MD style tables ''' writer = MarkdownTableWriter() writer.from_dataframe(df) return writer.write_table()
def _generate_raw_file_table( self, config_paths_by_file_tag: Dict[str, str], file_tags_with_raw_file_configs: List[str], views_by_raw_file: Dict[str, List[str]], ) -> str: """Generates a Markdown-formatted table of contents to be included in a raw file specification.""" table_matrix = [[ (f"[{file_tag}](#{file_tag})" if file_tag in file_tags_with_raw_file_configs else f"{file_tag}"), ",<br />".join(views_by_raw_file[file_tag]), self._get_last_updated(config_paths_by_file_tag[file_tag]), self._get_updated_by(config_paths_by_file_tag[file_tag]), ] for file_tag in sorted(config_paths_by_file_tag)] writer = MarkdownTableWriter( headers=[ "**Table**", "**Referencing Views**", "**Last Updated**", "**Updated By**", ], value_matrix=table_matrix, margin=1, ) return writer.dumps()
def evaluate(self, metrics1=None, metrics2=None, metrics3=None, metrics4=None): """ Summarizes the metrics into a table. """ from pytablewriter import MarkdownTableWriter metrics = [mtc for mtc in [metrics1, metrics2, metrics3, metrics4] if mtc is not None] method_names = [mtc[0] for mtc in metrics] metric_dicts = [mtc[1] for mtc in metrics] metric_names = metric_dicts[0].keys() headers = [""] + method_names value_matrix = [] for name in metric_names: value_row = [name] + [mdict[name] for mdict in metric_dicts] value_matrix.append(value_row) writer = MarkdownTableWriter( table_name="Report", headers=headers, value_matrix=value_matrix) writer.write_table()
def create_section(section_list, name): counter = 0 section_str = '## [↑](#-table-of-contents) {}\n\n'.format(name) # header = ['Task', 'Dataset', 'SOTA', 'Metric', 'SOTA Acc', 'Our Acc', 'Our Model', '📝', 'Notebook'] # header = ['Task', 'Dataset', 'SOTA', 'Metric', 'SOTA Acc', 'Our Acc', '📝', 'Notebook'] header = [ 'Task', 'Dataset', 'SOTA', 'SOTA Acc', 'Our Acc', '📝', 'Notebook' ] values_matrix = [] for row in section_list: values_matrix.append([ row[0], '[{}]({})'.format(row[1], row[2]) if row[2] else row[1], '[{}]({})'.format(row[3], row[4]) if row[4] else row[3], # row[5], row[6], row[7], # row[8], '[📝]({} "Article")'.format(row[9]), '[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)]' '(https://colab.research.google.com/github/eugenesiow/practical-ml/blob/master/notebooks/{}' ' "Open in Colab")'.format(row[10]) ]) counter += 1 writer = MarkdownTableWriter( headers=header, value_matrix=values_matrix, ) section_str += writer.dumps() section_str += '\n\n' return section_str, counter
def generate_markdown_table(input_params_yaml, output_markdown_file_path): try: with open(input_params_yaml, "r") as input: yamlData = yaml.safe_load(input) groups = yamlData["groups"] parameters = yamlData["parameters"] groupTable = MarkdownTableWriter() groupTable.table_name = "Groups" groupTable.styles = [ Style(align="left"), Style(align="left"), ] groupTable.headers = [ "Group", "Description" ] #"[field.capitalize() for field in group_fields] groupTable.margin = 1 groupTable.value_matrix = [ # list([group.get(field) for field in group_fields]) gen_group_header(group) for group in groups ] emptyGroup = { 'name': '', 'displayName': 'Ungrouped Parameters', 'description': 'All parameters that are not assigned to a specific group.' } if output_markdown_file_path: try: with open(output_markdown_file_path, "w") as output: groupTable.stream = output groupTable.write_table() for group in groups: gen_group(group, parameters, output) gen_group(emptyGroup, parameters, output) except Exception as e: logging.error( f"Failed to output Markdown to {output_markdown_file_path}", e, ) return 1 else: logging.error(f"Required markdown output") return 0 except Exception as e: logging.error(f"Failed to generate Markdown from {input_params_yaml}", e) return 1
def tab_comp(df_all, df_interpol, varname1, varname2, filename): df = pd.DataFrame(columns=varname1) df['metric'] = [ 'RMSE', 'bias', 'R2', 'N', 'RMSE', 'bias', 'R2', 'N', 'RMSE', 'bias', 'R2', 'N' ] df['time'] = [ 'all', 'all', 'all', 'all', 'night', 'night', 'night', 'night', 'day', 'day', 'day', 'day' ] df.set_index(['metric', 'time'], inplace=True) sza = df_interpol['sza'] day = sza < 70 night = sza > 110 for i in range(np.size(varname1)): x = df_interpol[varname1[i]].values y = df_interpol[varname2[i]].values x2 = x[~np.isnan(y) & ~np.isnan(x)] y2 = y[~np.isnan(y) & ~np.isnan(x)] df.loc[('R2', 'all'), varname1[i]] = r2_score(x2, y2) df.loc[('bias', 'all'), varname1[i]] = np.mean(x2 - y2) df.loc[('RMSE', 'all'), varname1[i]] = mean_squared_error(x2, y2) df.loc[('N', 'all'), varname1[i]] = len(x2) x = df_interpol.loc[night, varname1[i]].values y = df_interpol.loc[night, varname2[i]].values x2 = x[~np.isnan(y) & ~np.isnan(x)] y2 = y[~np.isnan(y) & ~np.isnan(x)] df.loc[('R2', 'night'), varname1[i]] = r2_score(x2, y2) df.loc[('bias', 'night'), varname1[i]] = np.mean(x2 - y2) df.loc[('RMSE', 'night'), varname1[i]] = mean_squared_error(x2, y2) df.loc[('N', 'night'), varname1[i]] = len(x2) x = df_interpol.loc[day, varname1[i]].values y = df_interpol.loc[day, varname2[i]].values x2 = x[~np.isnan(y) & ~np.isnan(x)] y2 = y[~np.isnan(y) & ~np.isnan(x)] df.loc[('R2', 'day'), varname1[i]] = r2_score(x2, y2) df.loc[('bias', 'day'), varname1[i]] = np.mean(x2 - y2) df.loc[('RMSE', 'day'), varname1[i]] = mean_squared_error(x2, y2) df.loc[('N', 'day'), varname1[i]] = len(x2) trunc = lambda x: math.trunc(100 * x) / 100 df = df.applymap(trunc) df = df.reset_index() df.to_csv(filename + '.csv') writer = MarkdownTableWriter() writer.from_dataframe(df) writer.write_table() # change the output stream to a file with open(filename + '.md', "w") as f: writer.stream = f writer.write_table()
def gen_group(group, parameters, output): parameter_fields = ["name", "description", "default"] display_name = group.get("displayName", group.get("name")) output.write(f'## <a name="{group.get("name")}"></a> {display_name} \n') if "description" in group: output.write(f'{group.get("description")}\n') output.write('\n\n') paramTable = MarkdownTableWriter() # paramTable.table_name = group["displayName"] paramTable.styles = [ Style(align="left", font_weight="bold"), Style(align="left"), Style(align="left"), ] paramTable.headers = [field.capitalize() for field in parameter_fields] paramTable.margin = 1 paramTable.value_matrix = [ list([parameter.get(field) for field in parameter_fields]) for parameter in parameters if parameter.get("group", "") == group.get("name") ] paramTable.stream = output paramTable.write_table() return 0
def generate_md_revision_sheet(filepath, result_path, lines, starting_item): writer = MarkdownTableWriter() if result_path: result_path = result_path else: result_path = result_path + "revision_sheet.md" writer.headers = ["Nr.", "Comment", "How the comment is addressed"] comment_list = [] for line in lines: if not line.strip(): continue comment_list.append(line.replace('\\newline', '')) matrix = [] for i in range(len(comment_list)): matrix.append([starting_item, comment_list[i], ""]) starting_item += 1 print(matrix) writer.value_matrix = matrix writer.write_table() with open(result_path, "w") as f: writer.stream = f writer.write_table() return
def print_nodes_mark_down(node_type, nodes): writer = MarkdownTableWriter() writer.headers = [ 'ID', 'Link to GitHub', "Created date", "Status", 'Title', 'Assigned to' ] writer.value_matrix = filter_nodes_data(node_type, nodes) print(writer.dumps())
def print_md_table(settings): values = [] for setting, data in settings.items(): values.append([ "`" + setting + "`", "" if data["default"] == "" else "`" + data["default"] + "`", data["context"], "no" if not "multiple" in data else "yes", data["help"] ]) writer = MarkdownTableWriter( headers=["Setting", "Default", "Context", "Multiple", "Description"], value_matrix=values) writer.write_table() print("")
def debug(self): parallel_matches = self._parallelize() writer = MarkdownTableWriter() writer.table_name = "debug" writer.headers = ["Src slice", "Index src", "Text src", "", "Text tgt", "Index tgt", "Tgt slice"] writer.column_styles = [Style(align='center')]*7 rows = [] for source_sequence, target_sequence in parallel_matches: source_sequence.context = self._source.context target_sequence.context = self._target.context col1 = source_sequence.slice_representation() col2 = "\n".join([str(i) for i in source_sequence.iter_index()]) col3 = "\n".join([s.context.get_sequence_text(s) for s in source_sequence]) col4 = '--->' col5 = "\n".join([s.context.get_sequence_text(s) for s in target_sequence]) col6 = "\n".join([str(i) for i in target_sequence.iter_index()]) col7 = target_sequence.slice_representation() for a, b, c, d, e, f, g in zip_longest( [col1], col2.split('\n'), col3.split('\n'), [col4], col5.split('\n'), col6.split('\n'), [col7]): rows.append([ a, b, c, d, e, f, g ]) rows.append([""]*7) rows.pop() writer.value_matrix = rows writer.write_table()
def _generate_docs_for_raw_config( raw_file_config: DirectIngestRawFileConfig, ) -> str: """Generates documentation for the given raw file config and returns it as a string.""" primary_key_columns = [col.upper() for col in raw_file_config.primary_key_cols] def _is_primary_key(column: str) -> str: return "YES" if column.upper() in primary_key_columns else "" def _get_enum_bullets(known_values: Optional[List[ColumnEnumValueInfo]]) -> str: if known_values is None: return "N/A" if not known_values: return "<No documentation>" list_contents = ", <br/>".join( [ f"<b>{enum.value}: </b> {enum.description if enum.description else 'Unknown'}" for enum in known_values ] ) return list_contents documentation = ( f"## {raw_file_config.file_tag}\n\n{raw_file_config.file_description}\n\n" ) table_matrix = [ [ column.name, column.description or "<No documentation>", _is_primary_key(column.name), _get_enum_bullets(column.known_values), ] for column in raw_file_config.columns ] writer = MarkdownTableWriter( headers=[ "Column", "Column Description", "Part of Primary Key?", "Distinct Values", ], value_matrix=table_matrix, # Margin values other than 0 have nondeterministic spacing. Do not change. margin=0, ) documentation += writer.dumps() return documentation
def write_to_table(log_dicts: dict): table_name = "Statistics" headers = None value_matrix = [] for mode, log_dict in log_dicts.items(): if headers is None: headers = ['solver'] + list(log_dict.keys()) value_matrix.append([mode] + list(log_dict.values())) assert headers is not None type_hints = [typehint.String] * len(headers) writer = MarkdownTableWriter(table_name=table_name, headers=headers, value_matrix=value_matrix, type_hints=type_hints) writer.write_table()
def aggregated_config_splits(self): """Try to build an aggregated markdown table with sizes for each split for each config, if all configs have the same splits.""" first_config = list(self.configs_info.keys())[0] config_splits0 = list( self.configs_info[first_config]["split_sizes"].keys()) headers = ["name"] + config_splits0 values = [] same_splits = True for k, v in self.configs_info.items(): config_splits = list(v["split_sizes"].keys()) if config_splits != config_splits0: same_splits = False break values.append([k] + [v["split_sizes"][key] for key in config_splits0]) if same_splits: writer = MarkdownTableWriter(table_name="### Configurations", headers=headers, value_matrix=values) ret = self.get_markdown_string(writer) return ret else: # The splits are not the same -> no aggregated table return None
def get_data_fields_description(self): output_parts = [] for config_name, config_info in self.configs_info.items(): headers = ["field name", "type", "description"] values = [] for field_name, field_description in config_info["fields"].items(): field_info = self.last_class_info.get(field_name, {}) try: type = field_info["type"] comment = field_info["comment"] except: print("MISSING FIELD INFORMATION", self.dataset_name, config_name, field_name, field_info) raise values.append([field_name, type, comment]) writer = MarkdownTableWriter( table_name=f"### {config_name}", headers=headers, value_matrix=values ) output_parts.append(self.get_markdown_string(writer)) all_the_same = all([output_part == output_parts[0] for output_part in output_parts]) if all_the_same: output = "#### " + ", ".join(list(self.configs_info.keys())) + "\n\n" output += output_parts[0] else: output = "" for index, config_name in enumerate(self.configs_info): output += f"#### {config_name}\n\n" output += output_parts[index] return output
def construct_table(gists: List[Dict[Any, Any]], writer: MarkdownTableWriter) -> MarkdownTableWriter: ''' construct the markdown table ''' md = list() for gist in gists: # get all files related to the gist files = ['`{}`'.format(file) for file in gist.get('files').keys()] # have HTML <br> for each file filestr = '<br>'.join(files) if len(files) > 1 else ''.join(files) # get the description for the gist description = gist.get('description') # the URL to gist gist_url = gist.get('html_url') # put them all together md.append(['[{}]({})'.format(description, gist_url), filestr]) \ if description \ else md.append(['[url]({})'.format(gist_url), filestr]) writer.value_matrix = md return writer
def main(): args = parse_args() random.seed(args.seed) np.random.seed(args.seed) lm = models.get_model(args.model).create_from_arg_string(args.model_args) if args.limit: print( "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT." ) if not args.no_cache: lm = base.CachingLM( lm, 'lm_cache/' + args.model + '_' + args.model_args.replace('=', '-').replace(',', '_') + '.db') if args.tasks == "all_tasks": task_names = tasks.ALL_TASKS else: task_names = args.tasks.split(",") task_dict = tasks.get_task_dict(task_names) results = evaluator.evaluate(lm, task_dict, args.provide_description, args.num_fewshot, args.limit) dumped = json.dumps(results, indent=2) print(dumped) if args.output_path: with open(args.output_path, "w") as f: f.write(dumped) # MAKE TABLE from pytablewriter import MarkdownTableWriter writer = MarkdownTableWriter() writer.headers = ["Task", "Metric", "Value"] values = [] for k, dic in results.items(): for m, v in dic.items(): values.append([k, m, '%.4f' % v]) k = "" writer.value_matrix = values print(writer.dumps())
def main(): random.seed(42) np.random.seed(42) lm = models.get_model(model).create_from_arg_string(model_args) if limit: print( "WARNING: --limit SHOULD ONLY BE USED FOR TESTING. REAL METRICS SHOULD NOT BE COMPUTED USING LIMIT." ) if not no_cache: lm = base.CachingLM( lm, 'lm_cache/' + model + '_' + model_args.replace('=', '-').replace(',', '_') + '.db') task_dict = tasks.get_task_dict([task]) for desc in fewshot_descriptions: custom_task_dict = { k: CustomDescTask(v, desc) for k, v in task_dict.items() } results = evaluator.evaluate(lm, custom_task_dict, True, num_fewshot, limit) dumped = json.dumps(results, indent=2) print('Description:', desc) print(dumped) # MAKE TABLE from pytablewriter import MarkdownTableWriter writer = MarkdownTableWriter() writer.headers = ["Task", "Metric", "Value"] values = [] for k, dic in results.items(): for m, v in dic.items(): values.append([k, m, '%.4f' % v]) k = "" writer.value_matrix = values print(writer.dumps())
def to_markdown(header, table, problem_count): writer = MarkdownTableWriter() title = conf.table_title + conf.table_total.format(count=problem_count) writer.table_name = title writer.headers = header writer.value_matrix = table writer.margin = 1 writer.stream = six.StringIO() writer.write_table() return writer.stream.getvalue()
def print(self): writer = MarkdownTableWriter() writer.headers = self._make_headers() writer.value_matrix = self._make_value_matrix() writer.styles = [Style(align="left")] + [Style(align="center") for _ in range(len(writer.headers) - 1)] writer.write_table() print("\n" + self._make_versions_text())
def write(self, title, math, relative=None, reverse_speedup=False): writer = MarkdownTableWriter() writer.table_name = f'{title} - {math.upper()}' main_header = ['**Batch Size**', '**Beam Size**'] data_header = [f'**Avg ({self.unit})**'] data_header += [f'**{p}% ({self.unit})**' for p in self.percentiles] if relative: speedup_header = ['**Speedup**'] * len(data_header) data_header = interleave(data_header, speedup_header) writer.headers = main_header + data_header writer.value_matrix = [] for k, v in self.data.items(): batch_size, beam_size = k avg, res_percentiles = v[math] main = [batch_size, beam_size] data = [avg, *res_percentiles] if relative: rel = self.data[k][relative] rel_avg, rel_res_percentiles = rel rel = [rel_avg, *rel_res_percentiles] speedup = [d / r for (r, d) in zip(rel, data)] if reverse_speedup: speedup = [1 / s for s in speedup] data = interleave(data, speedup) writer.value_matrix.append(main + data) writer.write_table()
def write(self, title, write_math): writer = MarkdownTableWriter() writer.table_name = f'{title}' main_header = ['**Batch Size**', '**Beam Size**'] data_header = [] if 'fp32' in write_math: data_header += [f'**Accuracy - FP32 ({self.unit})**'] if 'tf32' in write_math: data_header += [f'**Accuracy - TF32 ({self.unit})**'] if 'fp16' in write_math: data_header += [f'**Accuracy - FP16 ({self.unit})**'] writer.headers = main_header + data_header writer.value_matrix = [] for k, v in self.data.items(): batch_size, beam_size = k row = [batch_size, beam_size] if 'fp32' in write_math: row.append(v['fp32']) if 'tf32' in write_math: row.append(v['tf32']) if 'fp16' in write_math: row.append(v['fp16']) writer.value_matrix.append(row) writer.write_table()
def get_table_string(authors, dates, titles, topics): """ Returns an object ready to write a table :param authors: :param s: :param titles: :param topics: :return: """ writer = MarkdownTableWriter( table_name="Completed Articles", headers=["Author", "Title", "Year", "Topics"], value_matrix=[[a, tit, d, ", ".join(top)] for a, d, tit, top in zip(authors, dates, titles, topics) ], margin=1, # add a whitespace for both sides of each cell ) return writer.dumps()
def prepare_table(user: str, total: int, user_urls: Callable[..., Any]) -> MarkdownTableWriter: ''' prepare the header of table with - user Gist link - total badge - build status ''' writer = MarkdownTableWriter() writer.table_name = '''[My Github Gists]({})<br>{}{}'''.format( user_urls(key='USER_GIST'), user_urls(key='TOTAL_BADGE', total=total), user_urls(key='BUILD_BADGE')) writer.headers = [ 'description', 'files', ] return writer