def test_generate_schema_small(): """Test generation of schema for small CSV file.""" with open(os.path.abspath('caterpillar/test_resources/test_small.csv'), 'rbU') as f: csv_schema = schema.generate_csv_schema(f) assert csv_schema.has_header is True assert csv_schema.dialect.delimiter == ',' assert csv_schema.columns[3].type == csv_schema.columns[4].type == schema.ColumnDataType.TEXT assert len(csv_schema.columns) == 7
def test_generate_csv_schema_twitter(): """Test generation of schema for twitter CSV file.""" with open(os.path.abspath('caterpillar/test_resources/twitter_sentiment.csv'), 'rbU') as f: csv_schema = schema.generate_csv_schema(f) assert csv_schema.has_header is True assert csv_schema.dialect.delimiter == ',' columns = csv_schema.columns assert columns[0].name == 'Sentiment' assert columns[0].type == schema.ColumnDataType.IGNORE assert columns[1].name == 'Text' assert columns[1].type == schema.ColumnDataType.TEXT index_schema = csv_schema.as_index_schema(['good quality']) assert len(index_schema) == 1 assert isinstance(index_schema['Text'], TEXT) f.seek(0) reader = csv.reader(f) next(reader) for row in reader: assert len(csv_schema.map_row(row)) == 1
def test_generate_schema_no_header(): """Test generation of schema for CSV file with no header row.""" with open(os.path.abspath('caterpillar/test_resources/test_no_header.csv'), 'rbU') as f: csv_schema = schema.generate_csv_schema(f) assert csv_schema.has_header is False assert len(csv_schema.columns) == 7