Beispiel #1
0
    def main(self):
        """
        Main method for base ArffConverter class. Several helper methods must be defined by
        child classes.
        """
        self.create_data_frame()

        self.collect_comments()

        self.output_file.write('@RELATION {} \n\n'.format(
            quote_if_space(self.relation)))

        output_header = self.convert_header()

        for line in output_header:
            self.output_file.write(line)

        self.output_file.write('\n@DATA\n')

        for row in self.output_rows():
            self.output_file.write(row)

        self.output_file.close()

        if self.validate:
            validator = ArffValidator(input_file=self.input_file,
                                      arff_file=self.output_file)
            validator.validate()
Beispiel #2
0
 def test_quote_if_space(self):
     expected_outcomes = {
         'abcd': 'abcd',
         ' defg': '" defg"',
         'hijk ': '"hijk "',
         'lm no': '"lm no"',
     }
     for val in expected_outcomes.keys():
         outcome = utils.quote_if_space(val)
         self.assertEqual(outcome, expected_outcomes[val])
Beispiel #3
0
def compare_values(line, arff_line):
    """
    Compares entries in lines from the input & output files and raises a ValidationError on mismatch
    :param line: line from input file split into a list of entries
    :param arff_line: line from output file split into a list of entries
    """
    # TODO: How do I keep this from turning into one big switch statement or if, elif nightmare?
    msg = 'Line mismatch between input:\n{}\nand ARFF output:\n{}'.format(
        line, arff_line)
    for i, entry in enumerate(line):
        print(entry, arff_line[i])

        # should match, but doesn't
        if entry != arff_line[i]:
            if ' ' in entry and quote_if_space(entry) != arff_line[i]:
                raise ValidationError(msg)

        # shouldn't match, but does
        if entry == arff_line[i]:
            if ' ' in arff_line[i] and quote_if_space(entry) != arff_line[i]:
                raise ValidationError(msg)

            if entry in ARFF_FIELD_MAPS['none'] and arff_line[i] != '?':
                raise ValidationError(msg)
Beispiel #4
0
    def test_arff_data(self):
        self.input_file.next()
        arff_line = self.output_file.readline()

        while not arff_line.startswith('@DATA'):
            arff_line = self.output_file.readline()

        for csv_line in self.input_file:
            csv_line = csv_line.split(',')
            csv_line = [utils.quote_if_space(item) for item in csv_line]
            arff_line = self.output_file.readline().split(',')
            if '?' not in arff_line:
                self.assertEqual(csv_line, arff_line)
            else:
                # TODO: Implement special case testing
                pass
Beispiel #5
0
    def convert_header(self):
        """
        Converts header from data_frame to arff
        :return: list of lines
        """
        arff_header = []

        for column in self.data_frame.columns:
            attribute_name = column
            pd_dtype = str(self.data_frame[attribute_name].dtype)

            arff_dtype = self.map_data_types(pd_dtype, column)

            line = '@ATTRIBUTE {} {}\n'.format(quote_if_space(attribute_name),
                                               arff_dtype)
            arff_header.append(line)

        return arff_header