Beispiel #1
0
    def convert_MC28(self, input_file, output_file, delimiter=';'):
        fio = FileManipulation()
        ranges = RangeData()
        tokens = []

        with open(input_file, 'r', encoding='utf-8') as fin:
            for line in fin:
                token = TokenData()
                block = line.split(delimiter)
                token.word1 = block[0]
                token.word2 = block[1]
                token.simvalue = float(block[2].strip('\n'))
                new_range = ranges.range_category['cos']
                old_range = ranges.range_category['mc28']
                token.simvalue = numpy.interp(token.simvalue, old_range, new_range)
                tokens.append(token)
        fio.writeSimilarityWord(output_file, tokens)
Beispiel #2
0
    def convert_SCWS(self, input_file, output_file, delimiter='\t'):
        fio = FileManipulation()
        ranges = RangeData()
        tokens = []

        with open(input_file, 'r', encoding='utf-8') as fin:
            for line in fin:
                token = TokenData()
                block = line.split(delimiter)
                token.word1 = block[1]
                token.word2 = block[3]
                token.sent1 = block[5]  # target word1  between <b> </b>
                token.sent2 = block[6]  # target word2  between <b> </b>
                token.simvalue = float(block[7].strip('\n'))
                new_range = ranges.range_category['cos']
                old_range = ranges.range_category['scws']
                token.simvalue = numpy.interp(token.simvalue, old_range, new_range)
                tokens.append(token)
        fio.writeSimilarityContext(output_file, tokens)  # specific writer for SCWS