예제 #1
0
def generate_data(name):
    #这个函数作用,输入是指定的文件名,输出增加了gaps,is_daoti,以及其他共计145特征的完整向量矩阵
    #name='test_plus_gaps.csv'
    df=pd.read_csv(name,index_col=[0])
    df['gaps']=-10.0   
    df_gap=pd.read_csv("gaps.csv",index_col = [0])
    print(df_gap.index)
    i=0    
    str_s=""
    for j in range(len(df_gap.index)):
        #先打印二者的id
       # print(df.index[i])
        str_s='mp-'+str(df_gap.index[j])
        if(str_s==df.index[i]):
            df.iloc[i,-1]=df_gap.iloc[j,0]
            i=i+1
            #print("确实一样") 
    print("合并完毕")

    #同样的方法我们来建立不同的分类
    df['is_daoti']=-2
    for i in range(len(df.index)):
        if(df.ix[i,-2]==0):
            df.ix[i,-1]=1
        else:
            df.ix[i,-1]=0
    print("分类feature建立完成")   
    
#首先使用describe获得对于数据的整体把握
    print(df.describe())
    df.describe().to_csv('general_look_jie.csv')
#通过观察数据发现并没有什么异常之处
    df=StrToComposition().featurize_dataframe(df,'full_formula',ignore_errors=True)
    print(df.head())   
    #print(df['composition'])
    ep_feat=ElementProperty.from_preset(preset_name='magpie')
    df=ep_feat.featurize_dataframe(df,col_id='composition',ignore_errors=True)#将composition这一列作为特征化的输入
    print(df.head())
    #print(ep_feat.citations())
    #df.to_csv("plus the composition.csv")
    #以上这部分是将formula转化为composition并转化feature

    df=CompositionToOxidComposition().featurize_dataframe(df,col_id='composition')#引入了氧化态的相关特征
    os_feat=OxidationStates()
    df=os_feat.featurize_dataframe(df,col_id='composition_oxid')
    new_name='2d_vector_plus.csv'
    df.to_csv(new_name)
def test_featurizers():
    df = pd.read_csv('test.csv', index_col=[0])
    df = StrToComposition().featurize_dataframe(df, 'formula')
    print(df.head())
    #下一步,我们需要其中一个特征化来增加一系列的特征算符
    ep_feat = ElementProperty.from_preset(preset_name='magpie')
    df = ep_feat.featurize_dataframe(
        df, col_id='composition')  #将composition这一列作为特征化的输入
    print(df.head())
    print(ep_feat.citations())
    #df.to_csv('将composition特征化后.csv')

    #开始引入新的特征化算符吧
    df = CompositionToOxidComposition().featurize_dataframe(
        df, 'composition')  #引入了氧化态的相关特征
    os_feat = OxidationStates()
    df = os_feat.featurize_dataframe(df, col_id='composition_oxid')
    print(df.head())
    df.to_csv('after_test.csv')
def generate_data():
    df = load_elastic_tensor()
    df.to_csv('原始elastic数据.csv')
    print(df.columns)

    unwanted_columns = [
        'volume', 'nsites', 'compliance_tensor', 'elastic_tensor',
        'elastic_tensor_original', 'K_Voigt', 'G_Voigt', 'K_Reuss', 'G_Reuss'
    ]
    df = df.drop(unwanted_columns, axis=1)
    print(df.head())
    df.to_csv('扔掉不需要的部分.csv')

    #首先使用describe获得对于数据的整体把握
    print(df.describe())
    df.describe().to_csv('general_look.csv')
    #通过观察数据发现并没有什么异常之处
    df = StrToComposition().featurize_dataframe(df, 'formula')
    print(df.head())
    df.to_csv('引入composition.csv')

    #下一步,我们需要其中一个特征化来增加一系列的特征算符
    ep_feat = ElementProperty.from_preset(preset_name='magpie')
    df = ep_feat.featurize_dataframe(
        df, col_id='composition')  #将composition这一列作为特征化的输入
    print(df.head())
    print(ep_feat.citations())
    df.to_csv('将composition特征化后.csv')

    #开始引入新的特征化算符吧
    df = CompositionToOxidComposition().featurize_dataframe(
        df, 'composition')  #引入了氧化态的相关特征
    os_feat = OxidationStates()
    df = os_feat.featurize_dataframe(df, col_id='composition_oxid')
    print(df.head())
    df.to_csv('引入氧化态之后.csv')

    #其实除了基于composition的特征之外还有很多其他的,比如基于结构的
    df_feat = DensityFeatures()
    df = df_feat.featurize_dataframe(df, 'structure')
    print(df.head())
    df.to_csv('引入结构中的密度.csv')
    print(df_feat.feature_labels())
예제 #4
0
data = [['mp-1025496', 'Nb1 Se2'], ['mp-977563', 'Nb1 Ir2'],
        ['mp-864631', 'Nb1 Rh2'], ['mp-3368', 'Nb3 O8']]

fdf = pd.DataFrame(data, columns=['Id', 'Reduced Formula'])

## Initial conversion to matminer objects
from matminer.featurizers.conversions import StrToComposition

fdf = StrToComposition().featurize_dataframe(fdf, 'Reduced Formula')

from matminer.featurizers.conversions import CompositionToOxidComposition

fdf = CompositionToOxidComposition().featurize_dataframe(fdf, 'composition')

print("The initial dataset has {}".format(fdf.shape))
print(fdf.head())
'''
Block 2 - Featurization
'''
#
# -- start F1
from matminer.featurizers.composition import ElementProperty

ep_feat = ElementProperty.from_preset(preset_name='magpie')
fdf = ep_feat.featurize_dataframe(fdf,
                                  col_id='composition',
                                  ignore_errors=True)

# Excluding the 'MagpieData' string from the columns' names
magpie_cols = [col for col in fdf.columns if 'MagpieData' in col]
예제 #5
0
    if quick_demo:
        query_string += " AND mdf.scroll_id:<10000"

    data = mdf.get_data(query_string, unwind_arrays=False)
    print(data.head())
    # 重命名、预处理和筛选,delta_e应该是形成能
    data = data[['oqmd.delta_e.value', 'material.composition']]
    data = data.rename(columns={
        'oqmd.delta_e.value': 'delta_e',
        'material.composition': 'composition'
    })
    data = StrToComposition(
        target_col_id='composition_obj').featurize_dataframe(
            data, 'composition')
    data.sort_values('delta_e', ascending=True, inplace=True)
    print(data.head(3))
    for k in ['delta_e']:
        data[k] = pd.to_numeric(data[k])

    original_count = len(data)
    data = data[~data['delta_e'].isnull()]
    print('Removed %d/%d entries' %
          (original_count - len(data), original_count))

    original_count = len(data)
    data['composition'] = data['composition_obj'].apply(
        lambda x: x.reduced_formula)
    data.sort_values('delta_e', ascending=True, inplace=True)
    data.drop_duplicates('composition', keep='first', inplace=True)
    print('Removed %d/%d entries' %
          (original_count - len(data), original_count))