Beispiel #1
0
def main(args):
    output = u""

    if args.reportlist:
        load_language(args.language, True)
        analyzer = Analyzer.instance(args.language)
        output = analyzer.reportlist()

    elif args.metriclist:
        load_language(args.language, True)
        analyzer = Analyzer.instance(args.language)
        output = analyzer.metriclist(args.language)

    elif args.rulelist:
        load_language(args.language, True)
        analyzer = Analyzer.instance(args.language)
        output = analyzer.rulelist(args.language)

    elif args.validate:
        output = validate(args.files)

    elif args.xml:
        output = pdf2xml(args)

    elif args.report is not "":
        output = report(args)

    # Write output
    if args.outfile is not "":
        with open(args.outfile, "w") as f:
            f.write(output.encode("utf8"))
            f.write(u"\n".encode("utf8"))
    else:
        sys.stdout.write(output.encode("utf8"))
        sys.stdout.write(u"\n".encode("utf8"))
Beispiel #2
0
 def evaluate(self, node):
     A = Analyzer.instance()
     corp = A.get(corpus="TIGER")
     tagger = corp.tagger(True)
     tagged_words = tagger.tag(node.words())
     pres_verbs = 0
     total_verbs = 0
     for w in tagged_words:
         if w[1] and w[1].startswith("V"):
             #if w[1].startswith(u"VVFIN") or\
             #   w[1].startswith(u"VAFIN") or\
             #   w[1].startswith(u"VVINF") or\
             #   w[1].startswith(u"VVIZU"): # beinhaltet noch vergangenheit!
             #    pres_verbs += 1
             total_verbs += 1
             tense = tenses(w[0])
             if tense is not []:
                 tense = [t[0] for t in tense]
                 past_count = 0
                 present_count = 0
                 for t in tense:
                     if t == "past":
                         past_count += 1
                     elif t == "present":
                         present_count += 1
                 if present_count > past_count:
                     pres_verbs += 1
             #print w
     if total_verbs > 0:
         return float(pres_verbs) / total_verbs
     return 0.0
Beispiel #3
0
 def execute(self, docs, args):
     if len(docs) < 1:
         return u""
     output = []
     for doc in docs:
         output.append(u"# Dokumentbericht")
         output.append(u"")
         output.append(u"## Metriken")
         output.append(u"")
         for metric_ID in sorted((_METRIC_EXPECTATIONS.keys())):
             output.append(self._execute_metric(metric_ID, doc))
         output.append(u"")
         output.append(u"## Regeln")
         output.append(u"")
         rule_IDs = RULE_NAMES
         A = Analyzer.instance()
         rules = [
             A.get(rule=ID) for ID in rule_IDs if A.get(rule=ID) is not None
         ]
         rule_messages = eval_doc(doc, rules)
         if len(rule_messages) == 0:
             output.append(u"Es liegen keine Regelverletzungen vor!")
         else:
             for m in rule_messages:
                 output.append(m)
     return u"\n".join(output)
Beispiel #4
0
 def evaluate(self, node):
     A = Analyzer.instance()
     #corp = A.get(corpus=u"TIGER")
     words = node.words()
     word_count = len(words)
     word_len = reduce(lambda w, v: w + v, [len(w) for w in words], 0)
     if len(words) > 0:
         return word_len / float(len(words))
     return 0.0
Beispiel #5
0
 def evaluate(self, node):
     A = Analyzer.instance()
     corp = A.get(corpus="TIGER")
     sents = node.sents(tokenizer=corp.sent_tokenizer())
     summ = 0
     for s in sents:
         s = [w for w in s if w not in NO_WORDS]
         summ += len(s)
     if len(sents) > 0:
         return float(summ) / len(sents)
     return 0.0
Beispiel #6
0
 def evaluate(self, node):
     words = [w for w in node.words() if w not in NO_WORDS]
     A = Analyzer.instance()
     corp = A.get(corpus="TIGER")
     sents = node.sents(tokenizer=corp.sent_tokenizer())
     char_count = float(sum([len(w) for w in words]))
     word_count = float(len(words))
     sent_count = float(len(sents))
     if word_count > 0.0 and sent_count > 0.0:
         return (word_count / sent_count) + 9 * (char_count / word_count)
     return 0.0
Beispiel #7
0
 def evaluate(self, node):
     words = node.words()
     A = Analyzer.instance()
     corp = A.get(corpus="TIGER")
     sents_count = len(node.sents(tokenizer=corp.sent_tokenizer()))
     count = 0
     for w in words:
         low = w.lower()
         if low in self.IMPERSONAL:
             count += 1
     if sents_count > 0:
         return float(count) / sents_count
     return 0.0
Beispiel #8
0
 def evaluate(self, node):
     A = Analyzer.instance()
     corp = A.get(corpus="TIGER")
     sents = node.sents(tokenizer=corp.sent_tokenizer())
     sent_len_diff = 0
     last_sent = None
     for s in sents:
         s = [w for w in s if w not in NO_WORDS]
         if last_sent is not None:
             sent_len_diff += abs(len(last_sent) - len(s))
         last_sent = s
     if len(sents) > 1:
         return sent_len_diff / float(len(sents) - 1)
     return 0.0
Beispiel #9
0
 def evaluate(self, node):
     A = Analyzer.instance()
     corp = A.get(corpus="TIGER")
     tagger = corp.tagger(True)
     words = node.words()
     words_no_no_words = [w for w in words if w not in NO_WORDS]
     tagged_words = tagger.tag(words)
     word_count = len(words_no_no_words)
     count = 0
     for w in tagged_words:
         if w[1] and "ADV-MO" == w[1]:
             count += 1
     if word_count > 0:
         return float(count) / word_count
     return 0.0
Beispiel #10
0
 def evaluate(self, node):
     A = Analyzer.instance()
     corp = A.get(corpus="TIGER")
     fillers = list()
     if corp:
         fillers = corp.fillers()
     words = node.words()
     words_no_no_words = [w for w in words if w not in NO_WORDS]
     filler_count = 0
     for w in words:
         if w in fillers:
             filler_count += 1
     if len(words_no_no_words) > 0:
         return float(filler_count) / len(words_no_no_words)
     return 0.0
Beispiel #11
0
def main(args):
    output = u""

    if args.reportlist:
        load_language(args.language, True)
        analyzer = Analyzer.instance(args.language)
        output = analyzer.reportlist()

    elif args.metriclist:
        load_language(args.language, True)
        analyzer = Analyzer.instance(args.language)
        output = analyzer.metriclist(args.language)

    elif args.rulelist:
        load_language(args.language, True)
        analyzer = Analyzer.instance(args.language)
        output = analyzer.rulelist(args.language)

    elif args.validate:
        output = validate(args.files)

    elif args.xml:
        output = pdf2xml(args)

    elif args.report is not "":
        output = report(args)


    # Write output
    if args.outfile is not "":
        with open(args.outfile, "w") as f:
            f.write(output.encode("utf8"))
            f.write(u"\n".encode("utf8"))
    else:
        sys.stdout.write(output.encode("utf8"))
        sys.stdout.write(u"\n".encode("utf8"))
Beispiel #12
0
 def evaluate(self, node):
     words = node.words()
     A = Analyzer.instance()
     corp = A.get(corpus="TIGER")
     sents_count = len(node.sents(tokenizer=corp.sent_tokenizer()))
     tagger = corp.tagger(True)
     tagged_words = tagger.tag(words)
     count = 0
     if len(tagged_words) > 0:
         for w in tagged_words:
             if w[1] and w[1].startswith("V"):
                 lemm = lemma(w[0])
                 if lemm in self.VERBS:
                     count += 1
         return float(count) / sents_count
     return 0.0
Beispiel #13
0
 def evaluate(self, node):
     words = node.words()
     words_no_no_words = [w for w in words if w not in NO_WORDS]
     A = Analyzer.instance()
     corp = A.get(corpus="TIGER")
     tagger = corp.tagger(True)
     tagged_words = tagger.tag(words)
     unique_words = set()
     if len(tagged_words) > 0 and len(words_no_no_words) > 0:
         for w in tagged_words:
             if w[0] not in NO_WORDS:
                 if w[1] and w[1].startswith("V"):
                     lemm = lemma(w[0])
                     unique_words.add(lemm)
                 else:
                     unique_words.add(w[0])
         return float(len(unique_words)) / len(words_no_no_words)
     return 0.0
Beispiel #14
0
 def execute(self, docs, args):
     output = list()
     metric_names = METRIC_NAMES
     A = Analyzer.instance()
     metrics = [A.get(metric=m) for m in metric_names]
     metrics = [m for m in metrics if m != None]
     corp = A.get(corpus=u"TIGER")
     results = list()
     for m in metrics:
         results.append([m.evaluate(d) for d in docs])
     stats = [mean_stdev(r, ROUND) for r in results]
     if args.latex:
         output.append(u"\\begin{tabular}{l|l l|r}")
         output.append(u"    Metric & mean & stdev & TIGER \\\\")
         output.append(u"    \\hline")
     else:
         output.append(u"# Bericht \"%s\"" % self.ID)
         output.append(u"")
         output.append(u" * MEAN:  der Mittelwert über alle Dokumente")
         output.append(u" * STDEV: die dazugehörige Standardabweichung")
         output.append(
             u" * TIGER: Metrikwert für die deutsche Sprachereferenz,")
         output.append(u"          den TIGER-Corpus")
         output.append(u"")
         output.append(u"%s | MEAN  | STDEV | TIGER" %
                       u"METRIC".ljust(METRIC_COL_WIDTH))
         output.append(u"%s-+-------+-------+------" %
                       u"".ljust(METRIC_COL_WIDTH, u"-"))
     for i in range(len(metrics)):
         # Execute metrics on reference corpus
         val = metrics[i].evaluate(corp)
         val = round(val, ROUND)
         if args.latex:
             output.append(u"    %s & %s & %s & %s \\\\" %
                           (metric_names[i].ljust(METRIC_COL_WIDTH),
                            stats[i][0], stats[i][1], val))
         else:
             output.append(u"%s | %05.2f | %05.2f | %05.2f" %
                           (metric_names[i].ljust(METRIC_COL_WIDTH),
                            stats[i][0], stats[i][1], val))
     if args.latex:
         output.append(u"\\end{tabular}")
     return u"\n".join(output)
Beispiel #15
0
 def _execute_metric(self, metric_ID, node):
     A = Analyzer.instance()
     metric = A.get(metric=metric_ID)
     val = metric.evaluate(node)
     expect = _METRIC_EXPECTATIONS.get(metric_ID, None)
     output = u""
     if expect is not None:
         val_str = str(round(val, ROUND))
         if (expect.low is not None) and (expect.high is not None):
             output = u" * %s %s (erwartet: zw. %.2f und %.2f)" % (metric_ID, val_str, expect.low, expect.high)
         elif expect.low is not None:
             output = u" * %s %s (erwartet: min. %.2f)" % (metric_ID, val_str, expect.low)
         elif expect.high is not None:
             output = u" * %s %s (erwartet: max. %.2f)" % (metric_ID, val_str, expect.high)
         if (expect.low is not None) and val < expect.low:
             output += u"\n     %s" % expect.msg_toolow
         elif (expect.high is not None) and val > expect.high:
             output += u"\n     %s" % expect.msg_toohigh
         else:
             output += u"\n     %s" % expect.msg_ok
     return output
Beispiel #16
0
 def _execute_metric(self, metric_ID, node):
     A = Analyzer.instance()
     metric = A.get(metric=metric_ID)
     val = metric.evaluate(node)
     expect = _METRIC_EXPECTATIONS.get(metric_ID, None)
     output = ""
     if expect is not None:
         val_str = str(round(val, ROUND))
         if (expect.low is not None) and (expect.high is not None):
             output = " * %s %s (erwartet: zw. %.2f und %.2f)" % (metric_ID, val_str, expect.low, expect.high)
         elif expect.low is not None:
             output = " * %s %s (erwartet: min. %.2f)" % (metric_ID, val_str, expect.low)
         elif expect.high is not None:
             output = " * %s %s (erwartet: max. %.2f)" % (metric_ID, val_str, expect.high)
         if (expect.low is not None) and val < expect.low:
             output += "\n     %s" % expect.msg_toolow
         elif (expect.high is not None) and val > expect.high:
             output += "\n     %s" % expect.msg_toohigh
         else:
             output += "\n     %s" % expect.msg_ok
     return output
Beispiel #17
0
def report(args, output=u""):
    # Convert files to Documents
    dc = DocumentConverter()
    docs = list()
    for f in args.files:
        if op.isfile(f):
            if f.lower().endswith(PDF_SUFFIX):
                doc = PDF2document(f)
                docs.append(doc)
            elif f.lower().endswith(XML_SUFFIX):
                docs.extend(dc.to_Documents(f))

    # Fetch and execute report
    load_language(args.language)
    analyzer = Analyzer.instance()
    rep = analyzer.get(report=args.report)
    if rep:
        output += rep.execute(docs, args)
        pass
    else:
        output += 'No report named "%s" available!' % args.report
    return output
Beispiel #18
0
def report(args, output=u""):
    # Convert files to Documents
    dc = DocumentConverter()
    docs = list()
    for f in args.files:
        if op.isfile(f):
            if f.lower().endswith(PDF_SUFFIX):
                doc = PDF2document(f)
                docs.append(doc)
            elif f.lower().endswith(XML_SUFFIX):
                docs.extend(dc.to_Documents(f))

    # Fetch and execute report
    load_language(args.language)
    analyzer = Analyzer.instance()
    rep = analyzer.get(report=args.report)
    if rep:
        output += rep.execute(docs, args)
        pass
    else:
        output += 'No report named "%s" available!' % args.report
    return output
Beispiel #19
0
 def execute(self, docs, args):
     output = list()
     metric_names = METRIC_NAMES
     A = Analyzer.instance()
     metrics = [A.get(metric=m) for m in metric_names]
     metrics = [m for m in metrics if m != None]
     corp = A.get(corpus=u"TIGER")
     results = list()
     for m in metrics:
         results.append([m.evaluate(d) for d in docs])
     stats = [mean_stdev(r, ROUND) for r in results]
     if args.latex:
         output.append(u"\\begin{tabular}{l|l l|r}")
         output.append(u"    Metric & mean & stdev & TIGER \\\\")
         output.append(u"    \\hline")
     else:
         output.append(u"# Bericht \"%s\"" % self.ID)
         output.append(u"")
         output.append(u" * MEAN:  der Mittelwert über alle Dokumente")
         output.append(u" * STDEV: die dazugehörige Standardabweichung")
         output.append(u" * TIGER: Metrikwert für die deutsche Sprachereferenz,")
         output.append(u"          den TIGER-Corpus")
         output.append(u"")
         output.append(u"%s | MEAN  | STDEV | TIGER" % u"METRIC".ljust(METRIC_COL_WIDTH))
         output.append(u"%s-+-------+-------+------" % u"".ljust(METRIC_COL_WIDTH, u"-"))
     for i in range(len(metrics)):
         # Execute metrics on reference corpus
         val = metrics[i].evaluate(corp)
         val = round(val, ROUND)
         if args.latex:
             output.append(u"    %s & %s & %s & %s \\\\" % (metric_names[i].ljust(METRIC_COL_WIDTH), stats[i][0], stats[i][1], val))
         else:
             output.append(u"%s | %05.2f | %05.2f | %05.2f" % (metric_names[i].ljust(METRIC_COL_WIDTH), stats[i][0], stats[i][1], val))
     if args.latex:
         output.append(u"\\end{tabular}")
     return u"\n".join(output)
Beispiel #20
0
 def execute(self, docs, args):
     if len(docs) < 1:
         return u""
     output = list()
     output.append(u"# Dokumentbericht")
     output.append(u"")
     output.append(u"## Metriken")
     output.append(u"")
     doc = docs[0]
     for metric_ID in sorted((_METRIC_EXPECTATIONS.keys())):
         output.append(self._execute_metric(metric_ID, doc))
     output.append(u"")
     output.append(u"## Regeln")
     output.append(u"")
     rule_IDs = RULE_NAMES
     A = Analyzer.instance()
     rules = [A.get(rule=ID) for ID in rule_IDs if A.get(rule=ID) is not None]
     rule_messages = eval_doc(doc, rules)
     if len(rule_messages) == 0:
         output.append(u"Es liegen keine Regelverletzungen vor!")
     else:
         for m in rule_messages:
             output.append(m)
     return u"\n".join(output)
Beispiel #21
0
    def execute(self, docs, args):
        output = list()
        if len(docs) < 2 or len(docs) % 2 != 0:
            output.append(
                u"Error: Need an even number of documents (at least 2) for the document comparison report!"
            )
        else:
            metric_names = METRIC_NAMES
            A = Analyzer.instance()
            metrics = [A.get(metric=m) for m in metric_names]
            metrics = [m for m in metrics if m != None]
            if len(docs) == 2:
                output.append(u"# Bericht \"%s\"" % self.ID)
                output.append(u"")
                output.append(u" * PROGRESS: Vorher- --> Nachher-Wert.")
                output.append(u"             (+) ... Erhöhung         ")
                output.append(u"             (-) ... Verringerung     ")
                output.append(u"             (=) ... gleichbleibend   ")
                output.append(u"")
                output.append(u"%s | PROGRESS" %
                              u"METRIC".ljust(METRIC_COL_WIDTH))
                output.append(u"%s-+---------------------" %
                              u"".ljust(METRIC_COL_WIDTH, u"-"))
                for m in metrics:
                    vals = [m.evaluate(doc) for doc in docs]
                    progress = u"="
                    if vals[0] > vals[1]:
                        progress = u"-"
                    elif vals[0] < vals[1]:
                        progress = u"+"
                    output.append(u"%s | %05.2f --> %05.2f  (%s)" %
                                  (m.ID.ljust(METRIC_COL_WIDTH), vals[0],
                                   vals[1], progress))

            else:
                half = len(docs) / 2
                if args.latex:
                    output.append(u"\\begin{tabular}{l|l l|l l|r}")
                    output.append(
                        u"\\multirow{2}{*}{\\textbf{Metrik}} & \\multicolumn{2}{|c|}{\\textbf{Erhöhung}} & \\multicolumn{2}{|c|}{\\textbf{Verringerung}} & \\textbf{gleichbleibend} \\\\"
                    )
                    output.append(
                        u"                                 & \\multicolumn{1}{|c}{$\\#$} & \\multicolumn{1}{c|}{$\\Delta$} & \\multicolumn{1}{|c}{$\\#$} & \\multicolumn{1}{c|}{$\\Delta$} & \\multicolumn{1}{c}{$\\#$} \\\\"
                    )
                    output.append(u"    \\hline")
                else:
                    output.append(u"# Bericht \"%s\"" % self.ID)
                    output.append(u"")
                    output.append(u" * +:      Anzahl an Metrikerhöhungen")
                    output.append(
                        u" * DELTA+: Durchschnittliche Erhöhung um diesen Wert"
                    )
                    output.append(u" * -:      Anzahl an Metrikverringerungen")
                    output.append(
                        u" * DELTA-: Durchschnittliche Verringerung um diesen Wert"
                    )
                    output.append(
                        u" * =:      Anzahl an Dokumentpaaren, bei denen der")
                    output.append(
                        u"           Metrikwert gleich geblieben ist")
                    output.append(u"")
                    output.append(u"%s | +  | DELTA+ | -  | DELTA- | =  " %
                                  u"METRIC".ljust(METRIC_COL_WIDTH))
                    output.append(u"%s-+----+--------+----+--------+----" %
                                  u"".ljust(METRIC_COL_WIDTH, u"-"))
                for m in metrics:
                    results = list()
                    for i in range(half):
                        results.append(
                            (m.evaluate(docs[i]), m.evaluate(docs[i + half])))
                    counts = [0, 0, 0]  # greater, less, equal
                    avg_diffs = [0.0, 0.0]
                    for r in results:
                        if r[0] > r[1]:
                            counts[1] += 1
                            avg_diffs[1] += r[0] - r[1]
                        elif r[0] < r[1]:
                            counts[0] += 1
                            avg_diffs[0] += r[1] - r[0]
                        else:
                            counts[2] += 1
                    if counts[0] > 0:
                        avg_diffs[0] /= float(counts[0])
                        avg_diffs[0] = round(avg_diffs[0], ROUND + 1)
                    if counts[1] > 0:
                        avg_diffs[1] /= float(counts[1])
                        avg_diffs[1] = round(avg_diffs[1], ROUND + 1)
                    if args.latex:
                        output.append(u"    %s & %s & %s & %s & %s & %s \\\\" %
                                      (m.ID, counts[0], avg_diffs[0],
                                       counts[1], avg_diffs[1], counts[2]))
                    else:
                        output.append(
                            u"%s | %02d | %06.3f | %02d | %06.3f | %02d" %
                            (m.ID.ljust(METRIC_COL_WIDTH), counts[0],
                             avg_diffs[0], counts[1], avg_diffs[1], counts[2]))
                if args.latex:
                    output.append(u"\\end{tabular}")
        return u"\n".join(output)
Beispiel #22
0
    def execute(self, docs, args):
        output = []
        metric_names = METRIC_NAMES
        A = Analyzer.instance()
        metrics = [A.get(metric=m) for m in metric_names]
        metrics = [m for m in metrics if m != None]
        corp = A.get(corpus=u"TIGER")
        results = list()
        for m in metrics:
            results.append([m.evaluate(d) for d in docs])

        exceedances = self.compute_exceedances(metric_names, results)
        exceedances_transposed = list(map(list, zip(*exceedances)))

        # Metric matrix output
        doc_numbers = range(1, len(docs) + 1)
        if args.latex:
            tabular_format_str = [u" r" for d in docs]
            tabular_format_str = u"".join(tabular_format_str)
            output.append(u"\\begin{tabular}{l|%s}" % tabular_format_str)
            docs_header_str = map(u"& doc%02d ".__mod__, doc_numbers)
            docs_header_str = u"".join(docs_header_str)
            output.append(u"    Metrik %s\\\\" % docs_header_str)
            output.append(u"    \\hline")
            for i in range(len(metrics)):
                value_str = u""
                for doc_nr in range(len(results[i])):
                    if exceedances[i][doc_nr] == 1:
                        value_str = value_str + u"& \emph{%.2f} " % results[i][
                            doc_nr]
                    else:
                        value_str = value_str + u"& %.2f " % results[i][doc_nr]
                #value_str = map(u"& %.2f ".__mod__, results[i])
                #value_str = u"".join(value_str)
                output.append(
                    u"    %s %s\\\\" %
                    (metric_names[i].ljust(METRIC_COL_WIDTH), value_str))
        else:
            output.append(u"# Bericht \"%s\"" % self.ID)
            output.append(u"")
            docs_header_str = map(u"| doc%02d ".__mod__, doc_numbers)
            docs_header_str = u"".join(docs_header_str)
            output.append(u"%s%s" %
                          (u"METRIC".ljust(METRIC_COL_WIDTH), docs_header_str))
            dash_length = len(docs_header_str) - 2
            if dash_length < 0:
                dash_length = 0
            output.append(u"%s+%s" % (u"".ljust(
                METRIC_COL_WIDTH, u"-"), u"".ljust(dash_length, u"-")))
            for i in range(len(metrics)):
                value_str = map(u"| %05.2f ".__mod__, results[i])
                value_str = u"".join(value_str)
                output.append(
                    u"%s%s" %
                    (metric_names[i].ljust(METRIC_COL_WIDTH), value_str))

        # Exceedances/shortfalls
        exceedances_counts = map(sum, exceedances_transposed)
        if args.latex:
            output.append(u"    \\hline")
            exceedances_str = map(u"& %d ".__mod__, exceedances_counts)
            exceedances_str = u"".join(exceedances_str)
            output.append(
                u"    %s %s\\\\" %
                (u"Überschreitungen".ljust(METRIC_COL_WIDTH), exceedances_str))
        else:
            output.append(u"%s+%s" % (u"".ljust(
                METRIC_COL_WIDTH, u"-"), u"".ljust(dash_length, u"-")))
            exceedances_str = map(u"|    %02d ".__mod__, exceedances_counts)
            exceedances_str = u"".join(exceedances_str)
            output.append(
                u"%s%s" %
                (u"Transgressions".ljust(METRIC_COL_WIDTH), exceedances_str))

        # Rule violations
        rule_IDs = RULE_NAMES
        rules = [
            A.get(rule=ID) for ID in rule_IDs if A.get(rule=ID) is not None
        ]
        violated_rule_counts = [len(eval_doc(doc, rules)) for doc in docs]

        if args.latex:
            violated_rule_counts_str = map(u"& %d ".__mod__,
                                           violated_rule_counts)
            violated_rule_counts_str = u"".join(violated_rule_counts_str)
            output.append(u"    %s %s\\\\" %
                          (u"Regelverletzungen".ljust(METRIC_COL_WIDTH),
                           violated_rule_counts_str))
            output.append(u"\\end{tabular}")
        else:
            violated_rule_counts_str = map(u"|    %02d ".__mod__,
                                           violated_rule_counts)
            violated_rule_counts_str = u"".join(violated_rule_counts_str)
            output.append(u"%s%s" % (u"Violated rules".ljust(METRIC_COL_WIDTH),
                                     violated_rule_counts_str))

        return u"\n".join(output)
Beispiel #23
0
    def execute(self, docs, args):
        output = []
        metric_names = METRIC_NAMES
        A = Analyzer.instance()
        metrics = [A.get(metric=m) for m in metric_names]
        metrics = [m for m in metrics if m != None]
        corp = A.get(corpus=u"TIGER")
        results = list()
        for m in metrics:
            results.append([m.evaluate(d) for d in docs])

        exceedances = self.compute_exceedances(metric_names, results)
        exceedances_transposed = list(map(list, zip(*exceedances)))

        # Metric matrix output
        doc_numbers = range(1, len(docs) + 1)
        if args.latex:
            tabular_format_str = [u" r" for d in docs]
            tabular_format_str = u"".join(tabular_format_str)
            output.append(u"\\begin{tabular}{l|%s}" % tabular_format_str)
            docs_header_str = map(u"& doc%02d ".__mod__, doc_numbers)
            docs_header_str = u"".join(docs_header_str)
            output.append(u"    Metrik %s\\\\" % docs_header_str)
            output.append(u"    \\hline")
            for i in range(len(metrics)):
                value_str = u""
                for doc_nr in range(len(results[i])):
                    if exceedances[i][doc_nr] == 1:
                        value_str = value_str + u"& \emph{%.2f} " % results[i][doc_nr]
                    else:
                        value_str = value_str + u"& %.2f " % results[i][doc_nr]
                #value_str = map(u"& %.2f ".__mod__, results[i])
                #value_str = u"".join(value_str)
                output.append(u"    %s %s\\\\" % (metric_names[i].ljust(METRIC_COL_WIDTH), value_str))
        else:
            output.append(u"# Bericht \"%s\"" % self.ID)
            output.append(u"")
            docs_header_str = map(u"| doc%02d ".__mod__, doc_numbers)
            docs_header_str = u"".join(docs_header_str)
            output.append(u"%s%s" % (u"METRIC".ljust(METRIC_COL_WIDTH), docs_header_str))
            dash_length = len(docs_header_str) - 2
            if dash_length < 0:
                dash_length = 0
            output.append(u"%s+%s" % (u"".ljust(METRIC_COL_WIDTH, u"-"), u"".ljust(dash_length, u"-")))
            for i in range(len(metrics)):
                value_str = map(u"| %05.2f ".__mod__, results[i])
                value_str = u"".join(value_str)
                output.append(u"%s%s" % (metric_names[i].ljust(METRIC_COL_WIDTH), value_str))

        # Exceedances/shortfalls
        exceedances_counts = map(sum, exceedances_transposed)
        if args.latex:
            output.append(u"    \\hline")
            exceedances_str = map(u"& %d ".__mod__, exceedances_counts)
            exceedances_str = u"".join(exceedances_str)
            output.append(u"    %s %s\\\\" % (u"Überschreitungen".ljust(METRIC_COL_WIDTH), exceedances_str))
        else:
            output.append(u"%s+%s" % (u"".ljust(METRIC_COL_WIDTH, u"-"), u"".ljust(dash_length, u"-")))
            exceedances_str = map(u"|    %02d ".__mod__, exceedances_counts)
            exceedances_str = u"".join(exceedances_str)
            output.append(u"%s%s" % (u"Transgressions".ljust(METRIC_COL_WIDTH), exceedances_str))

        # Rule violations
        rule_IDs = RULE_NAMES
        rules = [A.get(rule=ID) for ID in rule_IDs if A.get(rule=ID) is not None]
        violated_rule_counts = [len(eval_doc(doc, rules)) for doc in docs]

        if args.latex:
            violated_rule_counts_str = map(u"& %d ".__mod__, violated_rule_counts)
            violated_rule_counts_str = u"".join(violated_rule_counts_str)
            output.append(u"    %s %s\\\\" % (u"Regelverletzungen".ljust(METRIC_COL_WIDTH), violated_rule_counts_str))
            output.append(u"\\end{tabular}")
        else:
            violated_rule_counts_str = map(u"|    %02d ".__mod__, violated_rule_counts)
            violated_rule_counts_str = u"".join(violated_rule_counts_str)
            output.append(u"%s%s" % (u"Violated rules".ljust(METRIC_COL_WIDTH), violated_rule_counts_str))

        return u"\n".join(output)
Beispiel #24
0
    def execute(self, docs, args):
        output = list()
        if len(docs) < 2 or len(docs) % 2 != 0:
            output.append(u"Error: Need an even number of documents (at least 2) for the document comparison report!")
        else:
            metric_names = METRIC_NAMES
            A = Analyzer.instance()
            metrics = [A.get(metric=m) for m in metric_names]
            metrics = [m for m in metrics if m != None]
            if len(docs) == 2:
                output.append(u"# Bericht \"%s\""% self.ID)
                output.append(u"")
                output.append(u" * PROGRESS: Vorher- --> Nachher-Wert.")
                output.append(u"             (+) ... Erhöhung         ")
                output.append(u"             (-) ... Verringerung     ")
                output.append(u"             (=) ... gleichbleibend   ")
                output.append(u"")
                output.append(u"%s | PROGRESS" % u"METRIC".ljust(METRIC_COL_WIDTH))
                output.append(u"%s-+---------------------" % u"".ljust(METRIC_COL_WIDTH, u"-"))
                for m in metrics:
                    vals = [m.evaluate(doc) for doc in docs]
                    progress = u"="
                    if vals[0] > vals[1]:
                        progress = u"-"
                    elif vals[0] < vals[1]:
                        progress = u"+"
                    output.append(u"%s | %05.2f --> %05.2f  (%s)" % (m.ID.ljust(METRIC_COL_WIDTH), vals[0], vals[1], progress))

            else:
                half = len(docs) / 2
                if args.latex:
                    output.append(u"\\begin{tabular}{l|l l|l l|r}")
                    output.append(u"\\multirow{2}{*}{\\textbf{Metrik}} & \\multicolumn{2}{|c|}{\\textbf{Erhöhung}} & \\multicolumn{2}{|c|}{\\textbf{Verringerung}} & \\textbf{gleichbleibend} \\\\")
                    output.append(u"                                 & \\multicolumn{1}{|c}{$\\#$} & \\multicolumn{1}{c|}{$\\Delta$} & \\multicolumn{1}{|c}{$\\#$} & \\multicolumn{1}{c|}{$\\Delta$} & \\multicolumn{1}{c}{$\\#$} \\\\")
                    output.append(u"    \\hline")
                else:
                    output.append(u"# Bericht \"%s\"" % self.ID)
                    output.append(u"")
                    output.append(u" * +:      Anzahl an Metrikerhöhungen")
                    output.append(u" * DELTA+: Durchschnittliche Erhöhung um diesen Wert")
                    output.append(u" * -:      Anzahl an Metrikverringerungen")
                    output.append(u" * DELTA-: Durchschnittliche Verringerung um diesen Wert")
                    output.append(u" * =:      Anzahl an Dokumentpaaren, bei denen der")
                    output.append(u"           Metrikwert gleich geblieben ist")
                    output.append(u"")
                    output.append(u"%s | +  | DELTA+ | -  | DELTA- | =  " % u"METRIC".ljust(METRIC_COL_WIDTH))
                    output.append(u"%s-+----+--------+----+--------+----" % u"".ljust(METRIC_COL_WIDTH, u"-"))
                for m in metrics:
                    results = list()
                    for i in range(half):
                        results.append((m.evaluate(docs[i]), m.evaluate(docs[i + half])))
                    counts = [0, 0, 0] # greater, less, equal
                    avg_diffs = [0.0, 0.0]
                    for r in results:
                        if r[0] > r[1]:
                            counts[1] += 1
                            avg_diffs[1] += r[0] - r[1]
                        elif r[0] < r[1]:
                            counts[0] += 1
                            avg_diffs[0] += r[1] - r[0]
                        else:
                            counts[2] += 1
                    if counts[0] > 0:
                        avg_diffs[0] /= float(counts[0])
                        avg_diffs[0] = round(avg_diffs[0], ROUND + 1)
                    if counts[1] > 0:
                        avg_diffs[1] /= float(counts[1])
                        avg_diffs[1] = round(avg_diffs[1], ROUND + 1)
                    if args.latex:
                        output.append(u"    %s & %s & %s & %s & %s & %s \\\\" % (m.ID, counts[0], avg_diffs[0], counts[1], avg_diffs[1], counts[2]))
                    else:
                        output.append(u"%s | %02d | %06.3f | %02d | %06.3f | %02d" % (m.ID.ljust(METRIC_COL_WIDTH), counts[0], avg_diffs[0], counts[1], avg_diffs[1], counts[2]))
                if args.latex:
                    output.append(u"\\end{tabular}")
        return u"\n".join(output)