Beispiel #1
0
def evaluate(model, pre_wordscount, post_wordscount, lang, correction, trace,
             crash, max_pred):
    size = f"# Size of model in memory: {model.sizemo} Mo\n\n"
    files = os.listdir(test)
    stats = {}
    if trace:
        tracetxt = "(trace on)"
    else:
        tracetxt = ""
    # Evaluate the model on every test files
    print(f">>> Evaluating {tracetxt}: ", end="")
    for file in files:
        if file.startswith("t-") and file.endswith(lang):
            test_type = file.split("-")[-1].split(".")[0]
            testfile = f"{test}{file}"
            print(test_type, end=" ")
            (content, encoding) = fread(testfile)
            reader = NMask(content, pre_wordscount, post_wordscount)
            stats[test_type] = _evaluate(model, reader, correction[test_type],
                                         trace, max_pred)
            if trace:
                write_ans(testfile, test_type, lang, encoding)

    now = datetime.datetime.now()
    # Write the results in a file (1 file per model)
    modelname = model.__class__.__name__
    with open(
            f"{results}{modelname}_{lang}_{pre_wordscount}c{post_wordscount}_{max_pred}p.txt",
            "w") as result:
        desc = f"# Results for {modelname} in {lang} with {max_pred} predictions and this context: {'_ '*pre_wordscount}<unk/> {'_ '*post_wordscount}\n"
        date = f"\n# {now.day}/{now.month}/{now.year}  {now.hour}:{now.minute}:{now.second}"
        result.write(desc + size + yaml.dump(stats, default_flow_style=False) +
                     date)
    print()
Beispiel #2
0
def evaluate(model, pre_wordscount, post_wordscount, lang, correction, trace,
             max_pred):
    files = os.listdir(test)
    stats = {}
    if trace:
        tracetxt = "(trace on)"
    else:
        tracetxt = ""
    # Evaluate the model on every test files
    print(f">>> Evaluating {tracetxt}: ", end="")
    for file in files:
        if file.startswith("t-") and file.endswith(lang):
            test_type = file.split("-")[-1].split(".")[0]
            testfile = f"{test}{file}"
            print(test_type, end=" ")
            try:
                with open(testfile, "r", encoding="utf-8") as f:
                    reader = NMask(f.read(), pre_wordscount, post_wordscount)
                    encoding = "utf-8"
            except UnicodeDecodeError:
                try:
                    with open(testfile, "r", encoding="utf-16") as f:
                        reader = NMask(f.read(), pre_wordscount,
                                       post_wordscount)
                        encoding = "utf-16"
                except UnicodeError:
                    with open(testfile, "r", encoding="iso-8859-1") as f:
                        reader = NMask(f.read(), pre_wordscount,
                                       post_wordscount)
                        encoding = "iso-8859-1"

            stats[test_type] = _evaluate(model, reader, correction[test_type],
                                         trace, max_pred)
            if trace:
                write_ans(testfile, test_type, lang, encoding)
    now = datetime.datetime.now()
    # Write the results in a file (1 file per model)
    modelname = model.__class__.__name__
    with open(
            f"{results}{modelname}_{lang}_{pre_wordscount}c{post_wordscount}_{max_pred}p.txt",
            "w") as result:
        desc = f"# Results for {modelname} in {lang} with {max_pred} predictions and this context: {'_ '*pre_wordscount}<unk/> {'_ '*post_wordscount}\n\n"
        result.write(
            desc + yaml.dump(stats) +
            f"\n{now.day}/{now.month}/{now.year}  {now.hour}:{now.minute}:{now.second}"
        )
    print()
Beispiel #3
0
	def train(self, filename):
		with open(filename, "r", encoding="utf-8") as file:
			print(f">>> Reading {filename}")
			data = file.read()
		reader = NMask(data, self.pre_words, self.post_words)
		tenth = int(len(data)/10)
		currtenth = tenth
		while reader.e < len(data):
			if reader.e > currtenth:
				print(".", end="", flush=True)
				currtenth += tenth
			self.add_ngram(reader)
			reader.next_token()
		# We must add the last N-Gram
		self.add_ngram(reader)
		# Normalize the struct
		print()
		print("Done")
Beispiel #4
0
 def train(self, filename):
     with open(filename, "r", encoding="utf-8") as file:
         print(f">>> Reading {filename}")
         data = file.read()
     reader = NMask(data, self.pre_words, self.post_words)
     tenth = int(len(data) / 10)
     currtenth = tenth
     while reader.e < len(data):
         if reader.e > currtenth:
             print(".", end="", flush=True)
             currtenth += tenth
         self.add_ngram(reader)
         reader.next_token()
     # We must add the last N-Gram
     self.add_ngram(reader)
     print("Done")
     print("Computing the size of the model ... ", end="")
     self.sizemo = self.getsize() / 1000000
     print(f"{self.sizemo} Mo")
Beispiel #5
0
def evaluatecrash(model, pre_wordscount, post_wordscount, lang, correction):
    (content, encoding) = fread(f"{test}t-unk-europarl-v7.fi-en-u05.{lang}")
    reader = NMask(content, pre_wordscount, post_wordscount)
    _evaluatecrash(model, reader, correction["u05"])