Esempio n. 1
0
def descriptor_and_rows_to_dataframe(descriptor, rows):

    # Prepare
    primary_key = None
    schema = Schema(descriptor)
    if len(schema.primary_key) == 1:
        primary_key = schema.primary_key[0]
    elif len(schema.primary_key) > 1:
        raise RuntimeError('Multi-column primary keys are not supported')

    # Get data/index
    data_rows = []
    index_rows = []
    jtstypes_map = {}
    for row in rows:
        values = []
        index = None
        for field, value in zip(schema.fields, row):
            try:
                value = field.cast_value(value)
            except InvalidObjectType:
                value = json.loads(value)
            if value is None and field.type in ('number', 'integer'):
                jtstypes_map[field.name] = 'number'
                value = np.NaN
            if field.name == primary_key:
                index = value
            else:
                values.append(value)
        data_rows.append(tuple(values))
        index_rows.append(index)

    # Get dtypes
    dtypes = []
    for field in schema.fields:
        if field.name != primary_key:
            field_name = field.name
            if six.PY2:
                field_name = field.name.encode('utf-8')
            dtype = jtstype_to_dtype(jtstypes_map.get(field.name, field.type))
            dtypes.append((field_name, dtype))

    # Create dataframe
    index = None
    columns = schema.headers
    array = np.array(data_rows, dtype=dtypes)
    if primary_key:
        index_field = schema.get_field(primary_key)
        index_dtype = jtstype_to_dtype(index_field.type)
        index_class = pd.Index
        if index_field.type in ['datetime', 'date']:
            index_class = pd.DatetimeIndex
        index = index_class(index_rows, name=primary_key, dtype=index_dtype)
        columns = filter(lambda column: column != primary_key, schema.headers)
    dataframe = pd.DataFrame(array, index=index, columns=columns)

    return dataframe
def test_get_field():
    schema = Schema(DESCRIPTOR_MIN)
    assert schema.get_field('id').name == 'id'
    assert schema.get_field('height').name == 'height'
    assert schema.get_field('undefined') is None