def __init__(self): Reference.__init__(self, 'German Wiktionary', 'http://de.wiktionary.org/w/api.php', 'wiktionary.pickle') #TODO(PM) Add a text interface to login # Open dictionary of noun categories with open('reference/kategorien.json', 'r') as store: self.category_dict = json.load(store)
def __init__(self, reference_tokens, negative_value=0.0): """ @param reference the reference translation that hypotheses shall be scored against. Must be an iterable of tokens (any type). """ Reference.__init__(self, reference_tokens) self.negative_value = negative_value
def __init__(self, reference_tokens, n=4): """ @param reference the reference translation that hypotheses shall be scored against. Must be an iterable of tokens (any type). @param n maximum n-gram order to consider. """ Reference.__init__(self, reference_tokens) self.n = n # preprocess reference self._reference_length = len(self._reference_tokens) self._reference_ngrams = self._get_ngrams(self._reference_tokens, self.n)
def __init__(self, reference_tokens, n=6, beta=1): """ @param reference the reference translation that hypotheses shall be scored against. @param n maximum character n-gram order to consider. @param beta algorithm paramater beta (interpolation weight, needs to be > 0). """ if beta <= 0: raise ValueError("Value of beta needs to be larger than zero!") Reference.__init__(self, reference_tokens) self.n = n self.max_order = n self.beta_squared = beta ** 2 # The paper specifies that whitespace is ignored, but for a training objective, #it's perhaps better to leave it in. According to the paper, it makes no #difference in practise for scoring. self._reference_string = " ".join(reference_tokens).strip() # Get n-grams from reference: self._reference_ngrams = self._get_ngrams(self._reference_string, self.n)
def __init__(self, reference_tokens, additional_flags=''): """ Computes the TER of a sentence. :param reference_tokens: the reference translation that hypotheses shall be scored against. Must be an iterable of tokens (any /tmp/3420971.ref type). :param additional_flags: additional TERCOM flags. """ self.d = dict(os.environ.copy()) self.d['LANG'] = 'C' self.extension = str(random.randint(0, 10000000)) self.hyp_filename = "/tmp/" + self.extension + ".hyp" self.ref_filename = "/tmp/" + self.extension + ".ref" self.ter_cmd = "bash " + TER_JAR + " -r " + self.ref_filename + " -h " + self.hyp_filename \ + additional_flags + "| grep TER | awk '{print $3}'" self.clean_cmd = "rm -f " + self.ref_filename + " " + self.hyp_filename # Used to guarantee thread safety self.lock = threading.Lock() Reference.__init__(self, reference_tokens) self._gts_ter = ' '.join(reference_tokens) + '\t(sentence%d)\n' % 0 with open(self.ref_filename, 'w') as f: f.write(self._gts_ter)
def __init__(self, reference_tokens, beer_scorer): Reference.__init__(self, reference_tokens) #Construct reference string from tokens self._reference_string = " ".join(reference_tokens) self._beer_scorer = beer_scorer
def __init__(self, reference_tokens, meteor_scorer): Reference.__init__(self, reference_tokens) #Construct reference string from tokens self._reference_string = " ".join(reference_tokens) self._meteor_scorer = meteor_scorer
def __init__(self, outfile, folder=False, opt=False, write_current=False, with_summary=True): self.with_summary = with_summary Reference.__init__(self, outfile, folder=folder, opt=opt, write_current=write_current)