Ejemplo n.º 1
0
def test_merge_tables_duplicate_column_names():
    """
    Confirm tables can be merged with overlapping column names, as long as they're not 
    included in the list of columns to retain.
    
    """
    d = {
        'building_id': [1, 2, 3, 4],
        'value': [4, 4, 4, 4],
        'dupe': [1, 1, 1, 1]
    }
    buildings = pd.DataFrame(d).set_index('building_id')

    d = {
        'household_id': [1, 2, 3],
        'building_id': [2, 3, 4],
        'dupe': [1, 1, 1]
    }
    households = pd.DataFrame(d).set_index('household_id')

    # Duplicate columns should raise a ValueError
    try:
        merged = merge_tables([households, buildings])
        pytest.fail()
    except ValueError as e:
        print(e)

    # Excluding the duplicated name should make things ok
    merged = merge_tables([households, buildings], columns=['value'])
    assert sorted(all_cols(merged)) == sorted(['household_id', 'value'])
Ejemplo n.º 2
0
def test_all_cols_dataframe(df):
    """
    Confirm that all_cols() works with DataFrame input.
    
    """
    cols = utils.all_cols(df)
    assert sorted(cols) == sorted(['id', 'val1', 'val2'])
Ejemplo n.º 3
0
def test_all_cols_orca(df):
    """
    Confirm that all_cols() works with Orca input.
    
    """
    orca.add_table('df', df)
    cols = utils.all_cols('df')
    assert sorted(cols) == sorted(['id', 'val1', 'val2'])
Ejemplo n.º 4
0
def test_all_cols_extras(df):
    """
    Confirm that all_cols() includes columns not part of the Orca core table.
    
    """
    orca.add_table('df', df)
    orca.add_column('df', 'newcol', pd.Series())
    cols = utils.all_cols('df')
    assert sorted(cols) == sorted(['id', 'val1', 'val2', 'newcol'])
Ejemplo n.º 5
0
def test_all_cols_unsupported_type(df):
    """
    Confirm that all_cols() raises an error for an unsupported type.
    
    """
    try:
        cols = utils.all_cols([df])
    except ValueError as e:
        print(e)
        return
    
    pytest.fail()
Ejemplo n.º 6
0
def test_merge_two_tables():
    """
    Merge two tables.
    
    """
    d = {'building_id': [1, 2, 3, 4], 'value': [4, 4, 4, 4]}
    buildings = pd.DataFrame(d).set_index('building_id')

    d = {'household_id': [1, 2, 3], 'building_id': [2, 3, 4]}
    households = pd.DataFrame(d).set_index('household_id')

    merged = merge_tables([households, buildings])
    assert sorted(all_cols(merged)) == sorted(
        ['household_id', 'building_id', 'value'])
Ejemplo n.º 7
0
def test_merge_three_tables_out_of_order():
    """
    Merge three tables, where the second and third are each merged onto the first.
    
    """
    d = {'zone_id': [1], 'size': [1]}
    zones = pd.DataFrame(d).set_index('zone_id')

    d = {'building_id': [1, 2, 3, 4], 'height': [4, 4, 4, 4]}
    buildings = pd.DataFrame(d).set_index('building_id')

    d = {
        'household_id': [1, 2, 3],
        'building_id': [2, 3, 4],
        'zone_id': [1, 1, 1]
    }
    households = pd.DataFrame(d).set_index('household_id')

    merged = merge_tables([households, buildings, zones])
    assert sorted(all_cols(merged)) == sorted(
        ['household_id', 'building_id', 'zone_id', 'height', 'size'])
Ejemplo n.º 8
0
def test_merge_three_tables():
    """
    Merge three tables.
    
    """
    d = {'zone_id': [1], 'size': [1]}
    zones = pd.DataFrame(d).set_index('zone_id')

    d = {
        'building_id': [1, 2, 3, 4],
        'zone_id': [1, 1, 1, 1],
        'height': [4, 4, 4, 4]
    }
    buildings = pd.DataFrame(d).set_index('building_id')

    d = {'household_id': [1, 2, 3], 'building_id': [2, 3, 4]}
    households = pd.DataFrame(d).set_index('household_id')

    merged = merge_tables([households, buildings, zones])
    assert sorted(all_cols(merged)) == sorted(
        ['household_id', 'building_id', 'zone_id', 'height', 'size'])
Ejemplo n.º 9
0
def test_merge_tables_multiindex():
    """
    Merge tables where the source table has a multi-index.
    
    """
    d = {
        'building_id': [1, 1, 2, 2],
        'unit_id': [1, 2, 1, 2],
        'value': [4, 4, 4, 4]
    }
    units = pd.DataFrame(d).set_index(['building_id', 'unit_id'])

    d = {
        'household_id': [1, 2, 3],
        'building_id': [1, 1, 2],
        'unit_id': [1, 2, 1]
    }
    households = pd.DataFrame(d).set_index('household_id')

    merged = merge_tables([households, units])
    assert sorted(all_cols(merged)) == sorted(
        ['household_id', 'building_id', 'unit_id', 'value'])