def main(args): output = u"" if args.reportlist: load_language(args.language, True) analyzer = Analyzer.instance(args.language) output = analyzer.reportlist() elif args.metriclist: load_language(args.language, True) analyzer = Analyzer.instance(args.language) output = analyzer.metriclist(args.language) elif args.rulelist: load_language(args.language, True) analyzer = Analyzer.instance(args.language) output = analyzer.rulelist(args.language) elif args.validate: output = validate(args.files) elif args.xml: output = pdf2xml(args) elif args.report is not "": output = report(args) # Write output if args.outfile is not "": with open(args.outfile, "w") as f: f.write(output.encode("utf8")) f.write(u"\n".encode("utf8")) else: sys.stdout.write(output.encode("utf8")) sys.stdout.write(u"\n".encode("utf8"))
def evaluate(self, node): A = Analyzer.instance() corp = A.get(corpus="TIGER") tagger = corp.tagger(True) tagged_words = tagger.tag(node.words()) pres_verbs = 0 total_verbs = 0 for w in tagged_words: if w[1] and w[1].startswith("V"): #if w[1].startswith(u"VVFIN") or\ # w[1].startswith(u"VAFIN") or\ # w[1].startswith(u"VVINF") or\ # w[1].startswith(u"VVIZU"): # beinhaltet noch vergangenheit! # pres_verbs += 1 total_verbs += 1 tense = tenses(w[0]) if tense is not []: tense = [t[0] for t in tense] past_count = 0 present_count = 0 for t in tense: if t == "past": past_count += 1 elif t == "present": present_count += 1 if present_count > past_count: pres_verbs += 1 #print w if total_verbs > 0: return float(pres_verbs) / total_verbs return 0.0
def execute(self, docs, args): if len(docs) < 1: return u"" output = [] for doc in docs: output.append(u"# Dokumentbericht") output.append(u"") output.append(u"## Metriken") output.append(u"") for metric_ID in sorted((_METRIC_EXPECTATIONS.keys())): output.append(self._execute_metric(metric_ID, doc)) output.append(u"") output.append(u"## Regeln") output.append(u"") rule_IDs = RULE_NAMES A = Analyzer.instance() rules = [ A.get(rule=ID) for ID in rule_IDs if A.get(rule=ID) is not None ] rule_messages = eval_doc(doc, rules) if len(rule_messages) == 0: output.append(u"Es liegen keine Regelverletzungen vor!") else: for m in rule_messages: output.append(m) return u"\n".join(output)
def evaluate(self, node): A = Analyzer.instance() #corp = A.get(corpus=u"TIGER") words = node.words() word_count = len(words) word_len = reduce(lambda w, v: w + v, [len(w) for w in words], 0) if len(words) > 0: return word_len / float(len(words)) return 0.0
def evaluate(self, node): A = Analyzer.instance() corp = A.get(corpus="TIGER") sents = node.sents(tokenizer=corp.sent_tokenizer()) summ = 0 for s in sents: s = [w for w in s if w not in NO_WORDS] summ += len(s) if len(sents) > 0: return float(summ) / len(sents) return 0.0
def evaluate(self, node): words = [w for w in node.words() if w not in NO_WORDS] A = Analyzer.instance() corp = A.get(corpus="TIGER") sents = node.sents(tokenizer=corp.sent_tokenizer()) char_count = float(sum([len(w) for w in words])) word_count = float(len(words)) sent_count = float(len(sents)) if word_count > 0.0 and sent_count > 0.0: return (word_count / sent_count) + 9 * (char_count / word_count) return 0.0
def evaluate(self, node): words = node.words() A = Analyzer.instance() corp = A.get(corpus="TIGER") sents_count = len(node.sents(tokenizer=corp.sent_tokenizer())) count = 0 for w in words: low = w.lower() if low in self.IMPERSONAL: count += 1 if sents_count > 0: return float(count) / sents_count return 0.0
def evaluate(self, node): A = Analyzer.instance() corp = A.get(corpus="TIGER") sents = node.sents(tokenizer=corp.sent_tokenizer()) sent_len_diff = 0 last_sent = None for s in sents: s = [w for w in s if w not in NO_WORDS] if last_sent is not None: sent_len_diff += abs(len(last_sent) - len(s)) last_sent = s if len(sents) > 1: return sent_len_diff / float(len(sents) - 1) return 0.0
def evaluate(self, node): A = Analyzer.instance() corp = A.get(corpus="TIGER") tagger = corp.tagger(True) words = node.words() words_no_no_words = [w for w in words if w not in NO_WORDS] tagged_words = tagger.tag(words) word_count = len(words_no_no_words) count = 0 for w in tagged_words: if w[1] and "ADV-MO" == w[1]: count += 1 if word_count > 0: return float(count) / word_count return 0.0
def evaluate(self, node): A = Analyzer.instance() corp = A.get(corpus="TIGER") fillers = list() if corp: fillers = corp.fillers() words = node.words() words_no_no_words = [w for w in words if w not in NO_WORDS] filler_count = 0 for w in words: if w in fillers: filler_count += 1 if len(words_no_no_words) > 0: return float(filler_count) / len(words_no_no_words) return 0.0
def evaluate(self, node): words = node.words() A = Analyzer.instance() corp = A.get(corpus="TIGER") sents_count = len(node.sents(tokenizer=corp.sent_tokenizer())) tagger = corp.tagger(True) tagged_words = tagger.tag(words) count = 0 if len(tagged_words) > 0: for w in tagged_words: if w[1] and w[1].startswith("V"): lemm = lemma(w[0]) if lemm in self.VERBS: count += 1 return float(count) / sents_count return 0.0
def evaluate(self, node): words = node.words() words_no_no_words = [w for w in words if w not in NO_WORDS] A = Analyzer.instance() corp = A.get(corpus="TIGER") tagger = corp.tagger(True) tagged_words = tagger.tag(words) unique_words = set() if len(tagged_words) > 0 and len(words_no_no_words) > 0: for w in tagged_words: if w[0] not in NO_WORDS: if w[1] and w[1].startswith("V"): lemm = lemma(w[0]) unique_words.add(lemm) else: unique_words.add(w[0]) return float(len(unique_words)) / len(words_no_no_words) return 0.0
def execute(self, docs, args): output = list() metric_names = METRIC_NAMES A = Analyzer.instance() metrics = [A.get(metric=m) for m in metric_names] metrics = [m for m in metrics if m != None] corp = A.get(corpus=u"TIGER") results = list() for m in metrics: results.append([m.evaluate(d) for d in docs]) stats = [mean_stdev(r, ROUND) for r in results] if args.latex: output.append(u"\\begin{tabular}{l|l l|r}") output.append(u" Metric & mean & stdev & TIGER \\\\") output.append(u" \\hline") else: output.append(u"# Bericht \"%s\"" % self.ID) output.append(u"") output.append(u" * MEAN: der Mittelwert über alle Dokumente") output.append(u" * STDEV: die dazugehörige Standardabweichung") output.append( u" * TIGER: Metrikwert für die deutsche Sprachereferenz,") output.append(u" den TIGER-Corpus") output.append(u"") output.append(u"%s | MEAN | STDEV | TIGER" % u"METRIC".ljust(METRIC_COL_WIDTH)) output.append(u"%s-+-------+-------+------" % u"".ljust(METRIC_COL_WIDTH, u"-")) for i in range(len(metrics)): # Execute metrics on reference corpus val = metrics[i].evaluate(corp) val = round(val, ROUND) if args.latex: output.append(u" %s & %s & %s & %s \\\\" % (metric_names[i].ljust(METRIC_COL_WIDTH), stats[i][0], stats[i][1], val)) else: output.append(u"%s | %05.2f | %05.2f | %05.2f" % (metric_names[i].ljust(METRIC_COL_WIDTH), stats[i][0], stats[i][1], val)) if args.latex: output.append(u"\\end{tabular}") return u"\n".join(output)
def _execute_metric(self, metric_ID, node): A = Analyzer.instance() metric = A.get(metric=metric_ID) val = metric.evaluate(node) expect = _METRIC_EXPECTATIONS.get(metric_ID, None) output = u"" if expect is not None: val_str = str(round(val, ROUND)) if (expect.low is not None) and (expect.high is not None): output = u" * %s %s (erwartet: zw. %.2f und %.2f)" % (metric_ID, val_str, expect.low, expect.high) elif expect.low is not None: output = u" * %s %s (erwartet: min. %.2f)" % (metric_ID, val_str, expect.low) elif expect.high is not None: output = u" * %s %s (erwartet: max. %.2f)" % (metric_ID, val_str, expect.high) if (expect.low is not None) and val < expect.low: output += u"\n %s" % expect.msg_toolow elif (expect.high is not None) and val > expect.high: output += u"\n %s" % expect.msg_toohigh else: output += u"\n %s" % expect.msg_ok return output
def _execute_metric(self, metric_ID, node): A = Analyzer.instance() metric = A.get(metric=metric_ID) val = metric.evaluate(node) expect = _METRIC_EXPECTATIONS.get(metric_ID, None) output = "" if expect is not None: val_str = str(round(val, ROUND)) if (expect.low is not None) and (expect.high is not None): output = " * %s %s (erwartet: zw. %.2f und %.2f)" % (metric_ID, val_str, expect.low, expect.high) elif expect.low is not None: output = " * %s %s (erwartet: min. %.2f)" % (metric_ID, val_str, expect.low) elif expect.high is not None: output = " * %s %s (erwartet: max. %.2f)" % (metric_ID, val_str, expect.high) if (expect.low is not None) and val < expect.low: output += "\n %s" % expect.msg_toolow elif (expect.high is not None) and val > expect.high: output += "\n %s" % expect.msg_toohigh else: output += "\n %s" % expect.msg_ok return output
def report(args, output=u""): # Convert files to Documents dc = DocumentConverter() docs = list() for f in args.files: if op.isfile(f): if f.lower().endswith(PDF_SUFFIX): doc = PDF2document(f) docs.append(doc) elif f.lower().endswith(XML_SUFFIX): docs.extend(dc.to_Documents(f)) # Fetch and execute report load_language(args.language) analyzer = Analyzer.instance() rep = analyzer.get(report=args.report) if rep: output += rep.execute(docs, args) pass else: output += 'No report named "%s" available!' % args.report return output
def execute(self, docs, args): output = list() metric_names = METRIC_NAMES A = Analyzer.instance() metrics = [A.get(metric=m) for m in metric_names] metrics = [m for m in metrics if m != None] corp = A.get(corpus=u"TIGER") results = list() for m in metrics: results.append([m.evaluate(d) for d in docs]) stats = [mean_stdev(r, ROUND) for r in results] if args.latex: output.append(u"\\begin{tabular}{l|l l|r}") output.append(u" Metric & mean & stdev & TIGER \\\\") output.append(u" \\hline") else: output.append(u"# Bericht \"%s\"" % self.ID) output.append(u"") output.append(u" * MEAN: der Mittelwert über alle Dokumente") output.append(u" * STDEV: die dazugehörige Standardabweichung") output.append(u" * TIGER: Metrikwert für die deutsche Sprachereferenz,") output.append(u" den TIGER-Corpus") output.append(u"") output.append(u"%s | MEAN | STDEV | TIGER" % u"METRIC".ljust(METRIC_COL_WIDTH)) output.append(u"%s-+-------+-------+------" % u"".ljust(METRIC_COL_WIDTH, u"-")) for i in range(len(metrics)): # Execute metrics on reference corpus val = metrics[i].evaluate(corp) val = round(val, ROUND) if args.latex: output.append(u" %s & %s & %s & %s \\\\" % (metric_names[i].ljust(METRIC_COL_WIDTH), stats[i][0], stats[i][1], val)) else: output.append(u"%s | %05.2f | %05.2f | %05.2f" % (metric_names[i].ljust(METRIC_COL_WIDTH), stats[i][0], stats[i][1], val)) if args.latex: output.append(u"\\end{tabular}") return u"\n".join(output)
def execute(self, docs, args): if len(docs) < 1: return u"" output = list() output.append(u"# Dokumentbericht") output.append(u"") output.append(u"## Metriken") output.append(u"") doc = docs[0] for metric_ID in sorted((_METRIC_EXPECTATIONS.keys())): output.append(self._execute_metric(metric_ID, doc)) output.append(u"") output.append(u"## Regeln") output.append(u"") rule_IDs = RULE_NAMES A = Analyzer.instance() rules = [A.get(rule=ID) for ID in rule_IDs if A.get(rule=ID) is not None] rule_messages = eval_doc(doc, rules) if len(rule_messages) == 0: output.append(u"Es liegen keine Regelverletzungen vor!") else: for m in rule_messages: output.append(m) return u"\n".join(output)
def execute(self, docs, args): output = list() if len(docs) < 2 or len(docs) % 2 != 0: output.append( u"Error: Need an even number of documents (at least 2) for the document comparison report!" ) else: metric_names = METRIC_NAMES A = Analyzer.instance() metrics = [A.get(metric=m) for m in metric_names] metrics = [m for m in metrics if m != None] if len(docs) == 2: output.append(u"# Bericht \"%s\"" % self.ID) output.append(u"") output.append(u" * PROGRESS: Vorher- --> Nachher-Wert.") output.append(u" (+) ... Erhöhung ") output.append(u" (-) ... Verringerung ") output.append(u" (=) ... gleichbleibend ") output.append(u"") output.append(u"%s | PROGRESS" % u"METRIC".ljust(METRIC_COL_WIDTH)) output.append(u"%s-+---------------------" % u"".ljust(METRIC_COL_WIDTH, u"-")) for m in metrics: vals = [m.evaluate(doc) for doc in docs] progress = u"=" if vals[0] > vals[1]: progress = u"-" elif vals[0] < vals[1]: progress = u"+" output.append(u"%s | %05.2f --> %05.2f (%s)" % (m.ID.ljust(METRIC_COL_WIDTH), vals[0], vals[1], progress)) else: half = len(docs) / 2 if args.latex: output.append(u"\\begin{tabular}{l|l l|l l|r}") output.append( u"\\multirow{2}{*}{\\textbf{Metrik}} & \\multicolumn{2}{|c|}{\\textbf{Erhöhung}} & \\multicolumn{2}{|c|}{\\textbf{Verringerung}} & \\textbf{gleichbleibend} \\\\" ) output.append( u" & \\multicolumn{1}{|c}{$\\#$} & \\multicolumn{1}{c|}{$\\Delta$} & \\multicolumn{1}{|c}{$\\#$} & \\multicolumn{1}{c|}{$\\Delta$} & \\multicolumn{1}{c}{$\\#$} \\\\" ) output.append(u" \\hline") else: output.append(u"# Bericht \"%s\"" % self.ID) output.append(u"") output.append(u" * +: Anzahl an Metrikerhöhungen") output.append( u" * DELTA+: Durchschnittliche Erhöhung um diesen Wert" ) output.append(u" * -: Anzahl an Metrikverringerungen") output.append( u" * DELTA-: Durchschnittliche Verringerung um diesen Wert" ) output.append( u" * =: Anzahl an Dokumentpaaren, bei denen der") output.append( u" Metrikwert gleich geblieben ist") output.append(u"") output.append(u"%s | + | DELTA+ | - | DELTA- | = " % u"METRIC".ljust(METRIC_COL_WIDTH)) output.append(u"%s-+----+--------+----+--------+----" % u"".ljust(METRIC_COL_WIDTH, u"-")) for m in metrics: results = list() for i in range(half): results.append( (m.evaluate(docs[i]), m.evaluate(docs[i + half]))) counts = [0, 0, 0] # greater, less, equal avg_diffs = [0.0, 0.0] for r in results: if r[0] > r[1]: counts[1] += 1 avg_diffs[1] += r[0] - r[1] elif r[0] < r[1]: counts[0] += 1 avg_diffs[0] += r[1] - r[0] else: counts[2] += 1 if counts[0] > 0: avg_diffs[0] /= float(counts[0]) avg_diffs[0] = round(avg_diffs[0], ROUND + 1) if counts[1] > 0: avg_diffs[1] /= float(counts[1]) avg_diffs[1] = round(avg_diffs[1], ROUND + 1) if args.latex: output.append(u" %s & %s & %s & %s & %s & %s \\\\" % (m.ID, counts[0], avg_diffs[0], counts[1], avg_diffs[1], counts[2])) else: output.append( u"%s | %02d | %06.3f | %02d | %06.3f | %02d" % (m.ID.ljust(METRIC_COL_WIDTH), counts[0], avg_diffs[0], counts[1], avg_diffs[1], counts[2])) if args.latex: output.append(u"\\end{tabular}") return u"\n".join(output)
def execute(self, docs, args): output = [] metric_names = METRIC_NAMES A = Analyzer.instance() metrics = [A.get(metric=m) for m in metric_names] metrics = [m for m in metrics if m != None] corp = A.get(corpus=u"TIGER") results = list() for m in metrics: results.append([m.evaluate(d) for d in docs]) exceedances = self.compute_exceedances(metric_names, results) exceedances_transposed = list(map(list, zip(*exceedances))) # Metric matrix output doc_numbers = range(1, len(docs) + 1) if args.latex: tabular_format_str = [u" r" for d in docs] tabular_format_str = u"".join(tabular_format_str) output.append(u"\\begin{tabular}{l|%s}" % tabular_format_str) docs_header_str = map(u"& doc%02d ".__mod__, doc_numbers) docs_header_str = u"".join(docs_header_str) output.append(u" Metrik %s\\\\" % docs_header_str) output.append(u" \\hline") for i in range(len(metrics)): value_str = u"" for doc_nr in range(len(results[i])): if exceedances[i][doc_nr] == 1: value_str = value_str + u"& \emph{%.2f} " % results[i][ doc_nr] else: value_str = value_str + u"& %.2f " % results[i][doc_nr] #value_str = map(u"& %.2f ".__mod__, results[i]) #value_str = u"".join(value_str) output.append( u" %s %s\\\\" % (metric_names[i].ljust(METRIC_COL_WIDTH), value_str)) else: output.append(u"# Bericht \"%s\"" % self.ID) output.append(u"") docs_header_str = map(u"| doc%02d ".__mod__, doc_numbers) docs_header_str = u"".join(docs_header_str) output.append(u"%s%s" % (u"METRIC".ljust(METRIC_COL_WIDTH), docs_header_str)) dash_length = len(docs_header_str) - 2 if dash_length < 0: dash_length = 0 output.append(u"%s+%s" % (u"".ljust( METRIC_COL_WIDTH, u"-"), u"".ljust(dash_length, u"-"))) for i in range(len(metrics)): value_str = map(u"| %05.2f ".__mod__, results[i]) value_str = u"".join(value_str) output.append( u"%s%s" % (metric_names[i].ljust(METRIC_COL_WIDTH), value_str)) # Exceedances/shortfalls exceedances_counts = map(sum, exceedances_transposed) if args.latex: output.append(u" \\hline") exceedances_str = map(u"& %d ".__mod__, exceedances_counts) exceedances_str = u"".join(exceedances_str) output.append( u" %s %s\\\\" % (u"Überschreitungen".ljust(METRIC_COL_WIDTH), exceedances_str)) else: output.append(u"%s+%s" % (u"".ljust( METRIC_COL_WIDTH, u"-"), u"".ljust(dash_length, u"-"))) exceedances_str = map(u"| %02d ".__mod__, exceedances_counts) exceedances_str = u"".join(exceedances_str) output.append( u"%s%s" % (u"Transgressions".ljust(METRIC_COL_WIDTH), exceedances_str)) # Rule violations rule_IDs = RULE_NAMES rules = [ A.get(rule=ID) for ID in rule_IDs if A.get(rule=ID) is not None ] violated_rule_counts = [len(eval_doc(doc, rules)) for doc in docs] if args.latex: violated_rule_counts_str = map(u"& %d ".__mod__, violated_rule_counts) violated_rule_counts_str = u"".join(violated_rule_counts_str) output.append(u" %s %s\\\\" % (u"Regelverletzungen".ljust(METRIC_COL_WIDTH), violated_rule_counts_str)) output.append(u"\\end{tabular}") else: violated_rule_counts_str = map(u"| %02d ".__mod__, violated_rule_counts) violated_rule_counts_str = u"".join(violated_rule_counts_str) output.append(u"%s%s" % (u"Violated rules".ljust(METRIC_COL_WIDTH), violated_rule_counts_str)) return u"\n".join(output)
def execute(self, docs, args): output = [] metric_names = METRIC_NAMES A = Analyzer.instance() metrics = [A.get(metric=m) for m in metric_names] metrics = [m for m in metrics if m != None] corp = A.get(corpus=u"TIGER") results = list() for m in metrics: results.append([m.evaluate(d) for d in docs]) exceedances = self.compute_exceedances(metric_names, results) exceedances_transposed = list(map(list, zip(*exceedances))) # Metric matrix output doc_numbers = range(1, len(docs) + 1) if args.latex: tabular_format_str = [u" r" for d in docs] tabular_format_str = u"".join(tabular_format_str) output.append(u"\\begin{tabular}{l|%s}" % tabular_format_str) docs_header_str = map(u"& doc%02d ".__mod__, doc_numbers) docs_header_str = u"".join(docs_header_str) output.append(u" Metrik %s\\\\" % docs_header_str) output.append(u" \\hline") for i in range(len(metrics)): value_str = u"" for doc_nr in range(len(results[i])): if exceedances[i][doc_nr] == 1: value_str = value_str + u"& \emph{%.2f} " % results[i][doc_nr] else: value_str = value_str + u"& %.2f " % results[i][doc_nr] #value_str = map(u"& %.2f ".__mod__, results[i]) #value_str = u"".join(value_str) output.append(u" %s %s\\\\" % (metric_names[i].ljust(METRIC_COL_WIDTH), value_str)) else: output.append(u"# Bericht \"%s\"" % self.ID) output.append(u"") docs_header_str = map(u"| doc%02d ".__mod__, doc_numbers) docs_header_str = u"".join(docs_header_str) output.append(u"%s%s" % (u"METRIC".ljust(METRIC_COL_WIDTH), docs_header_str)) dash_length = len(docs_header_str) - 2 if dash_length < 0: dash_length = 0 output.append(u"%s+%s" % (u"".ljust(METRIC_COL_WIDTH, u"-"), u"".ljust(dash_length, u"-"))) for i in range(len(metrics)): value_str = map(u"| %05.2f ".__mod__, results[i]) value_str = u"".join(value_str) output.append(u"%s%s" % (metric_names[i].ljust(METRIC_COL_WIDTH), value_str)) # Exceedances/shortfalls exceedances_counts = map(sum, exceedances_transposed) if args.latex: output.append(u" \\hline") exceedances_str = map(u"& %d ".__mod__, exceedances_counts) exceedances_str = u"".join(exceedances_str) output.append(u" %s %s\\\\" % (u"Überschreitungen".ljust(METRIC_COL_WIDTH), exceedances_str)) else: output.append(u"%s+%s" % (u"".ljust(METRIC_COL_WIDTH, u"-"), u"".ljust(dash_length, u"-"))) exceedances_str = map(u"| %02d ".__mod__, exceedances_counts) exceedances_str = u"".join(exceedances_str) output.append(u"%s%s" % (u"Transgressions".ljust(METRIC_COL_WIDTH), exceedances_str)) # Rule violations rule_IDs = RULE_NAMES rules = [A.get(rule=ID) for ID in rule_IDs if A.get(rule=ID) is not None] violated_rule_counts = [len(eval_doc(doc, rules)) for doc in docs] if args.latex: violated_rule_counts_str = map(u"& %d ".__mod__, violated_rule_counts) violated_rule_counts_str = u"".join(violated_rule_counts_str) output.append(u" %s %s\\\\" % (u"Regelverletzungen".ljust(METRIC_COL_WIDTH), violated_rule_counts_str)) output.append(u"\\end{tabular}") else: violated_rule_counts_str = map(u"| %02d ".__mod__, violated_rule_counts) violated_rule_counts_str = u"".join(violated_rule_counts_str) output.append(u"%s%s" % (u"Violated rules".ljust(METRIC_COL_WIDTH), violated_rule_counts_str)) return u"\n".join(output)
def execute(self, docs, args): output = list() if len(docs) < 2 or len(docs) % 2 != 0: output.append(u"Error: Need an even number of documents (at least 2) for the document comparison report!") else: metric_names = METRIC_NAMES A = Analyzer.instance() metrics = [A.get(metric=m) for m in metric_names] metrics = [m for m in metrics if m != None] if len(docs) == 2: output.append(u"# Bericht \"%s\""% self.ID) output.append(u"") output.append(u" * PROGRESS: Vorher- --> Nachher-Wert.") output.append(u" (+) ... Erhöhung ") output.append(u" (-) ... Verringerung ") output.append(u" (=) ... gleichbleibend ") output.append(u"") output.append(u"%s | PROGRESS" % u"METRIC".ljust(METRIC_COL_WIDTH)) output.append(u"%s-+---------------------" % u"".ljust(METRIC_COL_WIDTH, u"-")) for m in metrics: vals = [m.evaluate(doc) for doc in docs] progress = u"=" if vals[0] > vals[1]: progress = u"-" elif vals[0] < vals[1]: progress = u"+" output.append(u"%s | %05.2f --> %05.2f (%s)" % (m.ID.ljust(METRIC_COL_WIDTH), vals[0], vals[1], progress)) else: half = len(docs) / 2 if args.latex: output.append(u"\\begin{tabular}{l|l l|l l|r}") output.append(u"\\multirow{2}{*}{\\textbf{Metrik}} & \\multicolumn{2}{|c|}{\\textbf{Erhöhung}} & \\multicolumn{2}{|c|}{\\textbf{Verringerung}} & \\textbf{gleichbleibend} \\\\") output.append(u" & \\multicolumn{1}{|c}{$\\#$} & \\multicolumn{1}{c|}{$\\Delta$} & \\multicolumn{1}{|c}{$\\#$} & \\multicolumn{1}{c|}{$\\Delta$} & \\multicolumn{1}{c}{$\\#$} \\\\") output.append(u" \\hline") else: output.append(u"# Bericht \"%s\"" % self.ID) output.append(u"") output.append(u" * +: Anzahl an Metrikerhöhungen") output.append(u" * DELTA+: Durchschnittliche Erhöhung um diesen Wert") output.append(u" * -: Anzahl an Metrikverringerungen") output.append(u" * DELTA-: Durchschnittliche Verringerung um diesen Wert") output.append(u" * =: Anzahl an Dokumentpaaren, bei denen der") output.append(u" Metrikwert gleich geblieben ist") output.append(u"") output.append(u"%s | + | DELTA+ | - | DELTA- | = " % u"METRIC".ljust(METRIC_COL_WIDTH)) output.append(u"%s-+----+--------+----+--------+----" % u"".ljust(METRIC_COL_WIDTH, u"-")) for m in metrics: results = list() for i in range(half): results.append((m.evaluate(docs[i]), m.evaluate(docs[i + half]))) counts = [0, 0, 0] # greater, less, equal avg_diffs = [0.0, 0.0] for r in results: if r[0] > r[1]: counts[1] += 1 avg_diffs[1] += r[0] - r[1] elif r[0] < r[1]: counts[0] += 1 avg_diffs[0] += r[1] - r[0] else: counts[2] += 1 if counts[0] > 0: avg_diffs[0] /= float(counts[0]) avg_diffs[0] = round(avg_diffs[0], ROUND + 1) if counts[1] > 0: avg_diffs[1] /= float(counts[1]) avg_diffs[1] = round(avg_diffs[1], ROUND + 1) if args.latex: output.append(u" %s & %s & %s & %s & %s & %s \\\\" % (m.ID, counts[0], avg_diffs[0], counts[1], avg_diffs[1], counts[2])) else: output.append(u"%s | %02d | %06.3f | %02d | %06.3f | %02d" % (m.ID.ljust(METRIC_COL_WIDTH), counts[0], avg_diffs[0], counts[1], avg_diffs[1], counts[2])) if args.latex: output.append(u"\\end{tabular}") return u"\n".join(output)