Beispiel #1
0
def test_infer_schema_from_another_csv_file():
    '''Another sample CSV file test.'''

    path = '../test-data/test.csv'
    path = os.path.join(os.path.split(__file__)[0], path)
    abspath = os.path.abspath(path)

    schema = csv_utils.infer_schema_from_csv_file(abspath)

    fields = schema['fields']
    assert len(fields) == 7
    nose.tools.assert_equals(
        fields[0],
        {
            'index': 0,
            'name': 'datetime',
            'temporal_extent': '2011-12-30T00:00:00/2011-12-30T00:00:00',
            'type': 'datetime',
        }
    )
    assert fields[1]['index'] == 1
    assert fields[1]['name'] == 'timedelta'
    assert fields[1]['type'] == 'string'
    assert fields[2]['index'] == 2
    assert fields[2]['name'] == 'integer'
    assert fields[2]['type'] == 'integer'
Beispiel #2
0
def test_infer_dates_in_schema_from_csv_file():
    # get the absolute path to the test data file.
    path = '../test-data/data.csv'
    path = os.path.join(os.path.split(__file__)[0], path)
    abspath = os.path.abspath(path)

    schema = csv_utils.infer_schema_from_csv_file(abspath)
    nose.tools.assert_equals(schema['fields'][0]['type'], 'datetime')
Beispiel #3
0
def test_infer_schema_temporal_extent_raises_error(m):
    '''infer dates from a temporal extent, but raise an exception

    Test that infer_schema_from_csv_file handles exceptions well
    '''
    m.side_effect = [ValueError(), TypeError, IOError, IndexError()]
    # get the absolute path to the test data file.
    path = '../test-data/data.csv'
    path = os.path.join(os.path.split(__file__)[0], path)
    abspath = os.path.abspath(path)

    #run the test 4 time for each of our possible exceptions raised
    #by temporal_extent
    for i in range(4):
        schema = csv_utils.infer_schema_from_csv_file(abspath)
        nose.tools.assert_equals(schema['fields'][0]['type'], 'datetime')
Beispiel #4
0
def _infer_schema_for_resource(resource):
    '''Return a JSON Table Schema for the given resource.

    This will guess column headers and types from the resource's CSV file.

    '''
    # Note: Since this function is only called after uploading a file,
    # we assume the resource does have an uploaded file and this line will not
    # raise an exception.
    path = util.get_path_to_resource_file(resource)

    if not csv_utils.resource_is_csv_or_text_file(path):
        helpers.flash_notice(
            'This file does not seem to be a csv or text file. '
            'You could try validating this file at http://csvlint.io'
        )

    try:
        schema = csv_utils.infer_schema_from_csv_file(path)
    except exceptions.CouldNotReadCSVException:
        schema = {'fields': []}

    return schema
Beispiel #5
0
def test_infer_schema_from_csv_file():
    '''Test that infer_schema_from_csv_file infers the correct schema from a
    sample CSV file.

    This should be broken up into different tests for different types of CSV
    file. For now we just have this.

    '''
    # Get the absolute path to the test data file.
    path = '../test-data/lahmans-baseball-database/AllstarFull.csv'
    path = os.path.join(os.path.split(__file__)[0], path)
    abspath = os.path.abspath(path)

    schema = csv_utils.infer_schema_from_csv_file(abspath)

    assert schema == {
        'fields': [
            {'index': 0, 'name': 'playerID', 'type': 'string'},
            {'25%': 1957.0,
             '50%': 1975.0,
             '75%': 1996.0,
             'count': 4912.0,
             'index': 1,
             'max': 2013.0,
             'mean': 1975.2168159609121,
             'min': 1933.0,
             'name': 'yearID',
             'std': 23.055456639147902,
             'type': 'integer'},
            {'25%': 0.0,
             '50%': 0.0,
             '75%': 0.0,
             'count': 4912.0,
             'index': 2,
             'max': 2.0,
             'mean': 0.14128664495114007,
             'min': 0.0,
             'name': 'gameNum',
             'std': 0.46806965450335747,
             'type': 'integer'},
            {'index': 3, 'name': 'gameID', 'type': 'string'},
            {'index': 4, 'name': 'teamID', 'type': 'string'},
            {'index': 5, 'name': 'lgID', 'type': 'string'},
            {'25%': 1.0,
             '50%': 1.0,
             '75%': 1.0,
             'count': 4875.0,
             'index': 6,
             'max': 1.0,
             'mean': 0.78174358974358971,
             'min': 0.0,
             'name': 'GP',
             'std': 0.41310477594222272,
             'type': 'number'},
            {'25%': 3.0,
             '50%': 5.0,
             '75%': 7.0,
             'count': 1540.0,
             'index': 7,
             'max': 10.0,
             'mean': 5.0519480519480515,
             'min': 0.0,
             'name': 'startingPos',
             'std': 2.646100537485232,
             'type': 'number'}
        ]
    }