예제 #1
0
파일: test.py 프로젝트: kaaiian/CBFV
def test_drop_duplicates():
    cols = ['formula', 'target']
    formulae = ['NaCl', 'Al2O3', 'NaCl']

    df = pd.DataFrame(columns=cols)
    df['formula'] = formulae
    df['target'] = range(len(formulae))

    output = cmp.generate_features(df, drop_duplicates=False)
    out0, out1, out2, out3 = output
    assert out0.shape[0] == len(formulae)

    output = cmp.generate_features(df, drop_duplicates=True)
    out0, out1, out2, out3 = output
    assert out0.shape[0] == len(formulae) - 1
예제 #2
0
파일: test.py 프로젝트: kaaiian/CBFV
def test_sum_feat():
    cols = ['formula', 'target']
    formulae = ['NaCl', 'Al2O3', 'SiO2']

    df = pd.DataFrame(columns=cols)
    df['formula'] = formulae
    df['target'] = range(len(formulae))

    output = cmp.generate_features(df, elem_prop='oliynyk', sum_feat=False)
    out0, out1, out2, out3 = output
    assert out0.shape[1] == 264

    output = cmp.generate_features(df, elem_prop='oliynyk', sum_feat=True)
    out0, out1, out2, out3 = output
    assert out0.shape[1] == 308
예제 #3
0
파일: test.py 프로젝트: kaaiian/CBFV
def test_outputs():
    cols = ['formula', 'target']
    formulae = ['NaCl', 'Al2O3', 'NaCl', 'EsNo', 'BaTiO3', 'GaN', 'Am']
    targets = np.random.randn((len(formulae)))

    df = pd.DataFrame(columns=cols)
    df['formula'] = formulae
    df['target'] = targets

    output = cmp.generate_features(df)
    out0, out1, out2, out3 = output

    # check returns are the correct variable type
    assert (isinstance(out0, pd.core.frame.DataFrame)
            and isinstance(out1, pd.core.series.Series)
            and isinstance(out2, pd.core.series.Series)
            and isinstance(out3, list))

    # check returned targets are equal to originally specified
    assert np.allclose(out1, targets, rtol=1e-6, atol=1e-10)

    # check returned formulae are equal to originally specified
    assert np.all(out2.values == formulae)

    # check exotic elements are skipped
    assert set(out3) == set(['EsNo', 'Am'])
예제 #4
0
파일: test.py 프로젝트: kaaiian/CBFV
def test_mini():
    cols = ['formula', 'target']
    formulae = ['NaCl', 'Al2O3', 'SiO2']

    df = pd.DataFrame(columns=cols)
    df['formula'] = formulae
    df['target'] = range(len(formulae))

    output = cmp.generate_features(df, elem_prop='oliynyk', mini=False)
    out0, out1, out2, out3 = output
    orig_feats = out0.shape[-1]

    output = cmp.generate_features(df, elem_prop='oliynyk', mini=True)
    out0, out1, out2, out3 = output
    new_feats = out0.shape[-1]

    assert new_feats < orig_feats
예제 #5
0
파일: test.py 프로젝트: kaaiian/CBFV
def test_nans():
    cols = ['formula', 'target']
    formulae = ['NaN']

    df = pd.DataFrame(columns=cols)
    df['formula'] = formulae
    df['target'] = range(len(formulae))

    output = cmp.generate_features(df)
    out0, out1, out2, out3 = output
    assert out0.shape[0] != 0 and out0.shape[1] != 0
예제 #6
0
파일: test.py 프로젝트: kaaiian/CBFV
def test_extend_features():
    cols = ['formula', 'target', 'extra_feature1', 'extra_feature2']
    formulae = ['NaCl', 'Al2O3', 'SiO2']

    df = pd.DataFrame(columns=cols)
    df['formula'] = formulae
    df['target'] = range(len(formulae))
    df['extra_feature1'] = df['target'] + 0.5
    df['extra_feature2'] = df['target'] + 1.5

    output = cmp.generate_features(df, extend_features=True)
    out0, out1, out2, out3 = output
    assert 'extra_feature1' in out0.columns and 'extra_feature2' in out0.columns
예제 #7
0
파일: test.py 프로젝트: kaaiian/CBFV
def test_all_elem_props():
    cols = ['formula', 'target', 'extra_feature1', 'extra_feature2']
    formulae = ['C', 'B', 'F', 'V', 'NaCl', 'Al2O3', 'SiO2']

    df = pd.DataFrame(columns=cols)
    df['formula'] = formulae
    df['target'] = range(len(formulae))
    df['extra_feature1'] = df['target'] + 0.5
    df['extra_feature2'] = df['target'] + 1.5

    for elem_prop in ELEM_PROPS:
        output = cmp.generate_features(df, elem_prop=elem_prop)
        out0, out1, out2, out3 = output
예제 #8
0
# -*- coding: utf-8 -*-
"""
Created on Thu Apr  9 10:07:00 2020

@author: Steven Kauwe
"""
import pandas as pd
from time import sleep
from cbfv import composition


df = pd.read_csv('test_data_extended_feats.csv')

print('Featurizing DataFrame without extended features')
sleep(1)
output = composition.generate_features(df)
X_cbfv = output[0]


print('Featurizing DataFrame with extended features')
sleep(1)
output = composition.generate_features(df, extend_features=True)
X_extended = output[0]