def convert_header_with_schema(self, header): self.types = [None] * self.column_count # Value type of every column. for idx, field in enumerate(header): pair = field.split(':') # Multiple colons found in column name, emit error. # TODO might need to check for backtick escapes if len(pair) > 2: raise CSVError("%s: Field '%s' had %d colons" % (self.infile.name, field, len(field))) # Convert the column type. col_type = convert_schema_type(pair[1].upper().strip()) # If the column did not have a name but the type requires one, emit an error. if len(pair[0]) == 0 and col_type not in (Type.ID, Type.START_ID, Type.END_ID, Type.IGNORE): raise SchemaError( "%s: Each property in the header should be a colon-separated pair" % (self.infile.name)) else: # We have a column name and a type. # Only store the name if the column's values should be added as properties. if len(pair[0]) > 0 and col_type not in (Type.START_ID, Type.END_ID, Type.IGNORE): column_name = pair[0].strip() self.column_names[idx] = column_name # Store the column type. self.types[idx] = col_type
def validate_row(self, row): # Each row should have the same number of fields if len(row) != self.column_count: raise CSVError( "%s:%d Expected %d columns, encountered %d ('%s')" % (self.infile.name, self.reader.line_num, self.column_count, len(row), self.config.separator.join(row)))
def process_schemaless_header(self, header): if self.column_count < 2: raise CSVError( "Relation file '%s' should have at least 2 elements in header line." % (self.infile.name)) # The first column is the source ID and the second is the destination ID. self.start_id = 0 self.end_id = 1 self.start_namespace = None self.end_namespace = None for idx, field in enumerate(header[2:]): self.column_names[idx + 2] = field