def convert_header_with_schema(self, header):
        self.types = [None] * self.column_count  # Value type of every column.
        for idx, field in enumerate(header):
            pair = field.split(':')

            # Multiple colons found in column name, emit error.
            # TODO might need to check for backtick escapes
            if len(pair) > 2:
                raise CSVError("%s: Field '%s' had %d colons" %
                               (self.infile.name, field, len(field)))

            # Convert the column type.
            col_type = convert_schema_type(pair[1].upper().strip())

            # If the column did not have a name but the type requires one, emit an error.
            if len(pair[0]) == 0 and col_type not in (Type.ID, Type.START_ID,
                                                      Type.END_ID,
                                                      Type.IGNORE):
                raise SchemaError(
                    "%s: Each property in the header should be a colon-separated pair"
                    % (self.infile.name))
            else:
                # We have a column name and a type.
                # Only store the name if the column's values should be added as properties.
                if len(pair[0]) > 0 and col_type not in (Type.START_ID,
                                                         Type.END_ID,
                                                         Type.IGNORE):
                    column_name = pair[0].strip()
                    self.column_names[idx] = column_name

            # Store the column type.
            self.types[idx] = col_type
 def validate_row(self, row):
     # Each row should have the same number of fields
     if len(row) != self.column_count:
         raise CSVError(
             "%s:%d Expected %d columns, encountered %d ('%s')" %
             (self.infile.name, self.reader.line_num, self.column_count,
              len(row), self.config.separator.join(row)))
예제 #3
0
    def process_schemaless_header(self, header):
        if self.column_count < 2:
            raise CSVError(
                "Relation file '%s' should have at least 2 elements in header line."
                % (self.infile.name))
        # The first column is the source ID and the second is the destination ID.
        self.start_id = 0
        self.end_id = 1
        self.start_namespace = None
        self.end_namespace = None

        for idx, field in enumerate(header[2:]):
            self.column_names[idx + 2] = field