Ejemplo n.º 1
0
def load_input(input_file):
    lines = read(input_file).strip().split("\n")
    if lines[0][0] == "#":
        lines = lines[1:]
    content = [line.split("\t")[0] for line in lines]
    content = u" ".join(content)
    return content
Ejemplo n.º 2
0
def raw_to_corpus():
    file = join(dirname(__file__), "raw", "acts.json")
    data = read(file)
    posts = json.loads(data)
    posts = [transform_post(p) for p in posts]
    posts = [p for p in posts if filter_post(p)]
    rows = [get_row(p) for p in posts]
    convert_to_corpus(rows)
Ejemplo n.º 3
0
import json
from os.path import join
from pprint import pprint

from sklearn.metrics import confusion_matrix
from underthesea.util.file_io import read


def convert_cm_to_log(cm, labels, line=5):
    cm = cm.tolist()
    # cm = [" ".join([("%-" + str(line) + "s") % labels[index]] + map(lambda i: ("%" + str(line) + "d") % i, row)) for index, row in enumerate(cm)]
    cm_ = []
    for index, row in enumerate(cm):
        content = " ".join([("%-" + str(line) + "s") % labels[index]] +
                           map(lambda i: ("%" + str(line) + "d") % i, row))
        cm_.append(content)
    title = " " * (line + 1) + " ".join(
        map(lambda i: ("%" + str(line) + "s") % i, labels))
    cm.insert(0, title)
    return cm


# results = json.loads(read(join("logs", "20171006_153955", "result.json")))
results = json.loads(read(join("logs", "20171006_161437", "result.json")))
print(0)
actual = results["actual"]
expected = results["expected"]
labels = list(set(expected).union(set(actual)))
cm = confusion_matrix(expected, actual, labels)
cm = convert_cm_to_log(cm, labels)
pprint(cm, indent=2)
Ejemplo n.º 4
0
def load_output(filename):
    lines = [text.split("\t") for text in read(filename).strip().split("\n")]
    output = [tuple(item) for item in lines]
    return output
Ejemplo n.º 5
0
 def words(self):
     if not self.words_data:
         content = read(self.data_file).strip()
         words = content.split("\n")
         self.words_data = words
     return self.words_data
Ejemplo n.º 6
0
def load_output(input_file):
    lines = read(input_file).strip().split("\n")
    if lines[0][0] == "#":
        lines = lines[1:]
    text = "\n".join(lines)
    return text
Ejemplo n.º 7
0
def load_input(input_file):
    text = read(input_file)
    text = text.split("\n")[0]
    return text
Ejemplo n.º 8
0
def load_output(output_file):
    return read(output_file).strip().split("\n")