Beispiel #1
0
    def _get_weights_and_parameters(self, options):
        if self.options is None:
            return (np.array([1.0, 0.6, 0.8,
                              0.6]), np.array([0.85, 0.2, 0.6, 0.75]))

        weights, parameters = np.zeros(4), np.zeros(4)
        # a simple and (maybe) slow way to obtain weights and parameters
        with tempfile.TemporaryDirectory() as directory:
            ref_name = directory + '/ref'
            out_name = directory + '/out'

            corpus_utils.write_tokens(ref_name, [["test"]])
            corpus_utils.write_tokens(out_name, [["test"]])

            command = f'java -Xmx2G -jar {self.meteor_directory}/meteor-*.jar {out_name} {ref_name} {options}'

            p = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
            stats = p.communicate()[0].decode("utf-8").split()

            weights_index = stats.index('Weights:') + 1
            params_index = stats.index('Parameters:') + 1
            for i in range(4):
                weights[i] = float(stats[weights_index + i])
                parameters[i] = float(stats[params_index + i])

        return weights, parameters
Beispiel #2
0
    def cache_stats(self, ref, out):
        """
    Cache sufficient statistics for caculating METEOR score

    Args:
      ref: A reference corpus
      out: An output corpus

    Returns:
      A list of cached statistics
    """
        with tempfile.TemporaryDirectory() as directory:
            ref_name = directory + '/ref'
            out_name = directory + '/out'

            corpus_utils.write_tokens(ref_name, ref)
            corpus_utils.write_tokens(out_name, out)

            cached_stats = []

            command = f'java -Xmx2G -jar {self.meteor_directory}/meteor-*.jar {out_name} {ref_name} '
            if self.options:
                command += self.options
            command += ' -ssOut'

            p = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
            stats = p.communicate()[0].decode("utf-8").split('\n')[:-1]

            for stat_str in stats:
                stat = tuple(float(x) for x in stat_str.split())
                cached_stats.append(stat)

        return cached_stats