def make_numbers_timewise_filtered_dataset_metrics(self, dataset: str = "large", filter: str = "beta"): file = latex.File( self.tables_dir / f"numbers-time-wise-{filter}-filtered-{dataset}-dataset-metrics.tex" ) metrics = IOUtils.load( Macros.results_dir / "metrics" / f"time-wise-{filter}-filtered-{dataset}-dataset-stats.json", IOUtils.Format.json) for t in metrics.keys(): for k, v in metrics[t].items(): if k == "num-methods": file.append_macro( latex.Macro(f"{dataset}-{filter}-{t}-{k}", f"{v}")) # TODO: change back """ else: file.append_macro(latex.Macro(f"{dataset}-{filter}-{t}-{k}", "{:.1f}".format(v))) """ # end for file.save() return
def make_numbers_dataset_metrics(self): for task in Macros.tasks: file = latex.File(self.tables_dir / f"numbers-{task}-dataset-metrics.tex") dataset_metrics = IOUtils.load( Macros.results_dir / "metrics" / f"{task}-dataset.json", IOUtils.Format.json) for k, v in dataset_metrics.items(): fmt = f",d" if type(v) == int else f",.2f" file.append_macro(latex.Macro(f"ds-{task}-{k}", f"{v:{fmt}}")) raw_dataset_metrics = IOUtils.load( Macros.results_dir / "metrics" / f"{task}-raw-dataset.json", IOUtils.Format.json) for k, v in raw_dataset_metrics.items(): fmt = f",d" if type(v) == int else f",.2f" file.append_macro( latex.Macro(f"raw-ds-{task}-{k}", f"{v:{fmt}}")) file.save() return
def make_numbers_timewise_dataset_metrics(self, dataset: str = "large"): file = latex.File(self.tables_dir / f"numbers-time-wise-{dataset}-dataset-metrics.tex") metrics = IOUtils.load( Macros.results_dir / "metrics" / f"time-wise-{dataset}-dataset-stats.json", IOUtils.Format.json) for t in metrics.keys(): for k, v in metrics[t].items(): file.append_macro(latex.Macro(f"{dataset}-{t}-{k}", f"{v}")) # end for file.save() return
def make_numbers_model_results(self, model: str): file = latex.File(self.tables_dir / f"numbers-{model}-results.tex") stat_results = IOUtils.load(Macros.results_dir / "metrics" / f"results-stat-{model}.json") for exp, exp_stat_results in stat_results.items(): for test_set, set_stat_results in exp_stat_results.items(): for metric, metric_stat_results in set_stat_results.items(): for stat, number in metric_stat_results.items(): macro_name = f"{exp}-{test_set}-{metric}-{model}-{stat}" if number == np.NaN or number == "NaN": macro_value = r"\Fix{NaN}" else: macro_value = f"{number:,.2f}" file.append_macro(latex.Macro(macro_name, macro_value)) file.save() return
def make_table_timewise_dataset_metrics(self, dataset: str = "large"): file = latex.File(self.tables_dir / (f"table-time-wise-{dataset}-dataset-metrics.tex")) years = range(2013, 2021) # Header file.append(r"\begin{table*}") file.append(r"\begin{small}") file.append(r"\begin{center}") caption = r"Dataset statistics " + dataset file.append(r"\caption{" + caption + "}") file.append(r"\begin{tabular}{l | r r r r r r r r}") file.append(r"\toprule") file.append(r" &" r"2013 & " r"2014 & " r"2015 & " r"2016 & " r"2017 & " r"2018 & " r"2019 & " r"2020 \\") file.append(r"\midrule") for tvt in ["num-methods", "num-projs", "delta"]: file.append(f"{tvt}") for m in years: key = f"{dataset}-{m}_Jan_1-{tvt}" file.append(" & " + latex.Macro(key).use()) # end for file.append(r"\\") # end for # Footer file.append(r"\bottomrule") file.append(r"\end{tabular}") file.append(r"\end{center}") file.append(r"\end{small}") file.append(r"\vspace{\TVDatasetMetrics}") file.append(r"\end{table*}") file.save() return
def make_table_timewise_filtered_dataset_metrics(self, dataset: str = "large", filter: str = "beta"): years = range(2013, 2020) t_diffs = [f"{t}_Jan_1-{t + 1}_Jan_1" for t in years] time_points = [f"{t}-{t + 1}" for t in years] # Header if filter == "beta": file = latex.File(self.tables_dir / ( f"table-time-wise-{filter}-filtered-{dataset}-dataset-metrics.tex" )) file.append(r"\begin{table*}") file.append(r"\begin{small}") file.append(r"\begin{center}") caption = f"Method naming statistics after filtering" file.append(r"\caption{" + caption + "}") file.append(r"\begin{tabular}{l | c }") file.append(r"\toprule") file.append(r" ") for m in ["num-methods"]: file.append(r" &") file.append(f"{m} ") file.append(r" \\") file.append(r"\midrule") for time, t in zip(t_diffs, time_points): file.append(f"{t}") file.append(" & " + latex.Macro( f"{dataset}-{filter}-{time}-num-methods").use()) file.append(r"\\") # Footer file.append(r"\bottomrule") file.append(r"\end{tabular}") file.append(r"\end{center}") file.append(r"\end{small}") file.append(r"\vspace{\TVDatasetMetrics}") file.append(r"\end{table*}") file.save() return for item in ["method", "comment"]: file = latex.File(self.tables_dir / ( f"table-time-wise-{filter}-filtered-{item}-{dataset}-dataset-metrics.tex" )) file.append(r"\begin{table*}") file.append(r"\begin{small}") file.append(r"\begin{center}") caption = f"{item} statistics after filtering" file.append(r"\caption{" + caption + "}") file.append(r"\begin{tabular}{l | c c c c c c c}") file.append(r"\toprule") if item == "method": file.append(r" ") for m in [ "num-methods", "len-avg", "len-mode", "len-median", "len<100", "len<150", "len<200" ]: file.append(r" &") file.append(f"{m} ") file.append(r" \\") file.append(r"\midrule") for time, t in zip(t_diffs, time_points): file.append(f"{t}") file.append( " & " + latex.Macro(f"{dataset}-{time}-num-methods").use()) for tvt in [ "avg", "mode", "median", "less-100", "less-150", "less-200" ]: file.append(" & " + latex.Macro( f"{dataset}-{time}-method-tokens-{tvt}").use()) file.append(r"\\") # Footer file.append(r"\bottomrule") file.append(r"\end{tabular}") file.append(r"\end{center}") file.append(r"\end{small}") file.append(r"\vspace{\TVDatasetMetrics}") file.append(r"\end{table*}") file.save() elif item == "comment": file.append(r" ") for m in [ "num-methods", "len-avg", "len-mode", "len-median", "len<20", "len<30", "len<50" ]: file.append(r" &") file.append(f"{m} ") file.append(r" \\") file.append(r"\midrule") for time, t in zip(t_diffs, time_points): file.append(f"{t}") file.append( " & " + latex.Macro(f"{dataset}-{time}-num-methods").use()) for tvt in [ "avg", "mode", "median", "less-20", "less-30", "less-50" ]: file.append(" & " + latex.Macro( f"{dataset}-{time}-{item}-tokens-{tvt}").use()) file.append(r"\\") # Footer file.append(r"\bottomrule") file.append(r"\end{tabular}") file.append(r"\end{center}") file.append(r"\end{small}") file.append(r"\vspace{\TVDatasetMetrics}") file.append(r"\end{table*}") file.save() return
def make_table_dataset_metrics(self, version: str): for task in Macros.tasks: if version == "main": file = latex.File(self.tables_dir / f"table-{task}-dataset-metrics-main.tex") elif version == "split": file = latex.File(self.tables_dir / f"table-{task}-dataset-metrics-split.tex") else: raise ValueError(f"Invalid version {version}") metric_2_th = collections.OrderedDict() metric_2_th[ "num-proj"] = r"\multicolumn{2}{c|}{\UseMacro{TH-ds-num-project}}" metric_2_th[ "num-data"] = r"\multicolumn{2}{c|}{\UseMacro{TH-ds-num-data}}" metric_2_th["len-meth-AVG"] = r"& \UseMacro{TH-ds-len-method-avg}" metric_2_th[ "len-meth-MODE"] = r"& \UseMacro{TH-ds-len-method-mode}" metric_2_th[ "len-meth-MEDIAN"] = r"& \UseMacro{TH-ds-len-method-median}" metric_2_th[ "len-meth-le-100"] = r"& \UseMacro{TH-ds-len-method-le100}" metric_2_th[ "len-meth-le-150"] = r"& \UseMacro{TH-ds-len-method-le150}" metric_2_th[ "len-meth-le-200"] = r"\multirow{-6}{*}{\UseMacro{TH-ds-len-method}} & \UseMacro{TH-ds-len-method-le200}" metric_2_th["len-com-AVG"] = r"& \UseMacro{TH-ds-len-comment-avg}" metric_2_th[ "len-com-MODE"] = r"& \UseMacro{TH-ds-len-comment-mode}" metric_2_th[ "len-com-MEDIAN"] = r"& \UseMacro{TH-ds-len-comment-median}" metric_2_th[ "len-com-le-20"] = r"& \UseMacro{TH-ds-len-comment-le20}" metric_2_th[ "len-com-le-30"] = r"& \UseMacro{TH-ds-len-comment-le30}" metric_2_th[ "len-com-le-50"] = r"\multirow{-6}{*}{\UseMacro{TH-ds-len-comment}} & \UseMacro{TH-ds-len-comment-le50}" metric_2_th["len-name-AVG"] = r"& \UseMacro{TH-ds-len-name-avg}" metric_2_th["len-name-MODE"] = r"& \UseMacro{TH-ds-len-name-mode}" metric_2_th[ "len-name-MEDIAN"] = r"& \UseMacro{TH-ds-len-name-median}" metric_2_th["len-name-le-3"] = r"& \UseMacro{TH-ds-len-name-le2}" metric_2_th["len-name-le-5"] = r"& \UseMacro{TH-ds-len-name-le3}" metric_2_th[ "len-name-le-6"] = r"\multirow{-6}{*}{\UseMacro{TH-ds-len-name}} & \UseMacro{TH-ds-len-name-le6}" sep_after_rows = [ "num-data", "len-meth-le-200", "len-com-le-50", ] dt_2_is_raw = collections.OrderedDict() if version == "main": dt_2_is_raw["all"] = True dt_2_is_raw["2020"] = False dt_2_is_raw["2019-2020"] = False sep_after_cols = [] elif version == "split": for exp in ["mixedproj", "crossproj", "evo"]: for dt in [Macros.train, Macros.val]: dt_2_is_raw[f"{exp}-2020-{dt}"] = False dt_2_is_raw[f"2020-{Macros.test_common}"] = False sep_after_cols = [ f"mixedproj-2020-{Macros.val}", f"crossproj-2020-{Macros.val}", ] else: raise ValueError(f"Invalid version {version}") # Header file.append(r"\begin{" + ("table*" if version == "split" else "table") + "}") file.append(r"\begin{small}") file.append(r"\begin{center}") if version == "main": table_name = "DatasetMetricsMain" elif version == "split": table_name = "DatasetMetricsSplit" else: raise ValueError(f"Invalid version {version}") file.append(r"\caption{\TC" + table_name + "}") if version == "main": file.append( r"\begin{tabular}{ l@{\hspace{2pt}}|@{\hspace{2pt}}c@{\hspace{2pt}} | r r r}" ) elif version == "split": file.append( r"\begin{tabular}{ l@{\hspace{2pt}}|@{\hspace{2pt}}c@{\hspace{2pt}} | rr @{\hspace{5pt}}c@{\hspace{5pt}} rr @{\hspace{5pt}}c@{\hspace{5pt}} rr r}" ) else: raise ValueError(f"Invalid version {version}") file.append(r"\toprule") if version == "main": # Line 1 file.append(r"\multicolumn{2}{c|}{} & & & \\") # Line 2 file.append( r"\multicolumn{2}{c|}{\multirow{-2}{*}{\THDSStat}} & \multirow{-2}{*}{\UseMacro{TH-ds-all}} & \multirow{-2}{*}{\UseMacro{TH-ds-2020}} & \multirow{-2}{*}{\UseMacro{TH-ds-2019-2020}} \\" ) elif version == "split": # Line 1 file.append( r"\multicolumn{2}{c|}{}" r" & \multicolumn{2}{c}{\UseMacro{TH-ds-mixedproj}} &" r" & \multicolumn{2}{c}{\UseMacro{TH-ds-crossproj}} &" r" & \multicolumn{2}{c}{\UseMacro{TH-ds-evo}}" r" & \\\cline{3-4}\cline{6-7}\cline{9-10}") # Line 2 file.append( r"\multicolumn{2}{c|}{\multirow{-2}{*}{\THDSStat}}" r" & \UseMacro{TH-ds-mixedproj-train} & \UseMacro{TH-ds-mixedproj-val} &" r" & \UseMacro{TH-ds-crossproj-train} & \UseMacro{TH-ds-crossproj-val} &" r" & \UseMacro{TH-ds-evo-train} & \UseMacro{TH-ds-evo-val}" r" & \multirow{-2}{*}{\UseMacro{TH-ds-test}} \\") else: raise ValueError(f"Invalid version {version}") file.append(r"\midrule") for metric, row_th in metric_2_th.items(): file.append(row_th) for dt, is_raw in dt_2_is_raw.items(): if metric == "num-proj": if dt == f"crossproj-2020-{Macros.train}": macro_name = f"ds-{task}-num-proj_{Macros.train}" elif dt == f"crossproj-2020-{Macros.val}": macro_name = f"ds-{task}-num-proj_{Macros.val}" elif dt == f"2020-{Macros.test_common}": macro_name = f"ds-{task}-num-proj_{Macros.test}" else: macro_name = f"ds-{task}-num-proj" elif is_raw: macro_name = f"raw-ds-{task}-{metric}_{dt}" else: macro_name = f"ds-{task}-{metric}_{dt}" file.append(" & " + latex.Macro(macro_name).use()) if dt in sep_after_cols: file.append(" & ") file.append(r"\\") if metric in sep_after_rows: file.append(r"\midrule") # Footer file.append(r"\bottomrule") file.append(r"\end{tabular}") file.append(r"\end{center}") file.append(r"\end{small}") file.append(r"\vspace{\TV" + table_name + "}") file.append(r"\end{" + ("table*" if version == "split" else "table") + "}") file.save() return
def make_table_methd_name_results(self, task="Method-naming"): models = ["Bi-LSTM", "no-split-Bi-LSTM", "Code2Seq"] metrics = ["precision", "recall", "f1"] file = latex.File(self.tables_dir / f"table-{task}-models-results.tex") # evo results years = range(13, 18) # Header file.append(r"\begin{table*}") file.append(r"\begin{small}") file.append(r"\begin{center}") caption = f"{task} results" file.append(r"\caption{" + caption + "}") # \begin{tabular}{l | c | c |c |c |c} coll = r"\begin{tabular}{l" for i in range(len(models) * 3): coll += "|c" coll += "}" file.append(coll) file.append(r"\toprule") file.append(r" \multirow{2}{*}{Time-Metrics}") for m in models: file.append(r"& \multicolumn{3}{c}" + f"{{{m}}}") file.append(r"\\") for i in range(len(models)): for metric in metrics: file.append(f"& {metric}") file.append(r"\\") file.append(r"\midrule") for t in years: file.append(f"20{t}-20{t + 1}-train") for m in models: for metric in metrics: m = m.lower() key = f"{m.lower()}-{t}{t + 1}-train-{metric}" file.append(" & " + latex.Macro(key).use()) file.append(r"\\") # end for # end for # end for file.append(f"latest-mixed") for m in models: for metric in metrics: m = m.lower() key = f"{m.lower()}-latest-{metric}" file.append(" & " + latex.Macro(key).use()) file.append(r"\\") # end for file.append(f"latest-cross-project") for m in models: for metric in metrics: m = m.lower() key = f"{m.lower()}-cross-proj-latest-{metric}" file.append(" & " + latex.Macro(key).use()) file.append(r"\\") # end for # Footer file.append(r"\bottomrule") file.append(r"\end{tabular}") file.append(r"\end{center}") file.append(r"\end{small}") file.append(r"\vspace{\TVDatasetMetrics}") file.append(r"\end{table*}") file.save() return
def make_table_models_results(self, task: str): if task == "ComGen": models = ["Seq2seq", "Seq2seqAtt", "DeepCom"] metrics = ["bleu", "xmatch"] elif task == "MethNam": models = ["Bi-LSTM", "no-split-Bi-LSTM", "Code2Seq"] metrics = ["f1", "precision", "recall", "xmatch"] else: raise ValueError(f"Invalid task {task}") exps = ["mixedproj-2020", "crossproj-2020", "evo-2020"] # Load stat sign test results no_diff_pairs = IOUtils.load(Macros.results_dir / "metrics" / "sign-test" / f"{task}.json") exp_model_metric_2_symbols = collections.defaultdict(list) for i, (emm1, emm2, _) in enumerate(no_diff_pairs): symbol = self.SYMBOLS[i] exp_model_metric_2_symbols[tuple(emm1)].append(symbol) exp_model_metric_2_symbols[tuple(emm2)].append(symbol) file = latex.File(self.tables_dir / f"table-{task}-models-results.tex") # Header file.append(r"\begin{table*}") file.append(r"\begin{small}") file.append(r"\begin{center}") table_name = f"Results{task}" caption = r"\TC" + table_name file.append(r"\caption{" + caption + "}") file.append(r"\begin{tabular}{l" + ("|" + "r" * len(metrics)) * 3 + "}") file.append(r"\toprule") # Line 1 for i, exp in enumerate(exps): if i == len(exps) - 1: multicolumn = "c" else: multicolumn = "c|" file.append(r" & \multicolumn{" + f"{len(metrics)}" + r"}{" + multicolumn + r"}{\UseMacro{TH-exp-" + exp + r"}}") file.append(r"\\") # Line 2 file.append(r"\multirow{-2}{*}{\THModel} ") for exp in exps: for metric in metrics: file.append(r" & \UseMacro{TH-metric-" + metric + r"}") file.append(r"\\") file.append(r"\midrule") for model in models: file.append(r"\UseMacro{TH-model-" + model + r"}") for exp in exps: for metric in metrics: suffix = "" symbols = exp_model_metric_2_symbols[(exp, model, metric)] if len(symbols) > 0: suffix = "$^{" + "".join(symbols) + "}$" file.append(r" & " + latex.Macro( f"{exp}-test_common-{metric}-{model}-AVG").use() + suffix) # + r"$\pm$" # + latex.Macro(f"{exp}-test_common-{metric}-{model}-STDEV").use()) file.append(r"\\") # Footer file.append(r"\bottomrule") file.append(r"\end{tabular}") file.append(r"\end{center}") file.append(r"\end{small}") file.append(r"\vspace{\TV" + table_name + r"}") file.append(r"\end{table*}") file.save() return