Beispiel #1
0
def evolution_drift_with_seq(df_gas, n_batch_training):
    ## Check the drift importance. Use the first N batch to train, and check the
    ## clasifications results with the others.

    #Train
    evo_train(df_gas, n_batch_training)

    # Test and save results in dict
    model_dict = {}
    for batch in range(n_batch_training + 1, 11):
        print(f'\n\n----------Batch 2-----------{batch} \n\n')
        seq = SeqModel()
        model_name = f'temp_training_{n_batch_training}_test{batch}'
        model = seq.load_model(model_name)

        ev = Evolution_drift(df_gas, n_batch_training, batch)
        _, X_test, _, y_test = ev.split_data()
        loss, acc = seq.model_evaluate(X_test, y_test)
        model_dict[batch] = {'acc': acc, 'loss': loss}

    #Plot results
    df_results = pd.DataFrame.from_dict(model_dict).T

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 8))
    fig.suptitle(f'Training with first {n_batch_training} batches')
    ax1 = df_results.plot(kind='bar', y='acc', ax=ax1)
    ax1.set_ylim([0, 1])
    ax2 = df_results.plot(kind='bar', y='loss', ax=ax2)
    save_figure(fig, f'Step1_NBATCH_{n_batch_training}_acc_loss')
def apply_tsne(X, y, name):
    print('tsne2d')
    X_embedded = TSNE(n_components=2).fit_transform(X)
    fig, ax = plt.subplots(figsize=(12, 8))
    ax.set_title('TSNE 2d Batch1, Sensor1, Concentration less 100ppmv')
    scatter = ax.scatter(X_embedded[:, 0],
                         X_embedded[:, 1],
                         c=y,
                         label=y.unique())
    legend1 = ax.legend(*scatter.legend_elements(),
                        loc="upper right",
                        title="Gas")
    plt.show()
    save_figure(fig, f'Step0_3_TSNE_2d_{name}')

    print('tsne3d')
    X_embedded = TSNE(n_components=3).fit_transform(X)
    fig = plt.figure(figsize=(8, 6))
    ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
    ax.scatter(X_embedded[:, 0], X_embedded[:, 1], X_embedded[:, 2], c=y)
    ax.set_title('TSNE 3d Batch1, Sensor1, Concentration less 100ppmv')
    legend1 = ax.legend(*scatter.legend_elements(),
                        loc="upper right",
                        title="Gas")
    plt.show()
    save_figure(fig, f'Step0_3_TSNE_3d_{name}')
def apply_KMeans_3d(X, y, name):
    pca = PCA(n_components=3)
    pca.fit(X, y)
    xp = pca.transform(X)

    number_of_clusters = 6
    km = KMeans(n_clusters=number_of_clusters)
    # Normally people fit the matrix
    y_pred = km.fit_predict(X)
    #igualamos las categorias a los indices 1 al 6 del dataframe
    y_pred = y_pred + 1

    fig = plt.figure(figsize=(8, 6))
    plt.clf()
    ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
    scatter = ax.scatter(xp[:, 0], xp[:, 1], xp[:, 2], c=y_pred)
    legend1 = ax.legend(*scatter.legend_elements(),
                        loc="upper right",
                        title="Clusters")
    plt.show()
    save_figure(fig, f'Step0_3_Color for each cluster_3d_{name}')

    fig = plt.figure(figsize=(8, 6))
    ax = Axes3D(fig, rect=[0, 0, .95, 1], elev=48, azim=134)
    scatter = ax.scatter(xp[:, 0], xp[:, 1], xp[:, 2], c=y)
    legend1 = ax.legend(*scatter.legend_elements(),
                        loc="upper right",
                        title="Gas")
    plt.show()
    save_figure(fig, f'Step0_3_Color for each gas_3d_{name}')

    fig = plt.figure(3, figsize=(8, 6))
    conf = confusion_matrix(y, y_pred)
    sns.heatmap(conf, annot=True, fmt='d')
    plt.title(f"Confusion matrix_{name}")
    plt.show()
Beispiel #4
0
    df_conf = pd.DataFrame(data=confusion, columns=label_gas, index=label_gas)
    sns.heatmap(df_conf, annot=True, fmt="d", cmap='Blues')
    plt.title('Confusion Matrix')
    plt.yticks(rotation=0)
    plt.show()
    return f


if __name__ == '__main__':

    # Check the results for the sequential Neural Net
    # Load data
    df_gas = load_data()

    mod1 = SeqModelSimple(df_gas)
    X_train, X_test, y_train, y_test = mod1.split_data()
    #mod1.train_and_save_model('ModelSimple')

    mod2 = SeqModelWithConcentration(df_gas)
    X_train2, X_test2, y_train2, y_test2 = mod2.split_data()
    #mod2.train_and_save_model('ModelWithConcentration')

    seq = SeqModel()
    seq.load_model('ModelSimple')
    f = plot_conf(seq.get_model(), X_test, y_test)
    save_figure(f, 'ConfMatrix_ModelSimple')

    seq = SeqModel()
    seq.load_model('ModelWithConcentration')
    f = plot_conf(seq.get_model(), X_test2, y_test2)
    save_figure(f, 'ConfMatrix_ModelWithConcentration')
Beispiel #5
0
    df_temp = pd.DataFrame()
    for g, c in dict_gas_concentration.items():
        df_select = df[(df['GAS'] == g) & (df['CONCENTRATION'] == c)]
        df_temp = df_temp.append(df_select)

    # Select only one sensor
    X = df_temp.iloc[:, :8]
    y = df_temp['GAS']

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=42)
    fig = lgbm_conf_shap(X_train, X_test, y_train, y_test)
    save_figure(fig, 'Step4_LGBM_one_sensor')

    ### Now let's use all sensor data
    # Select all sensors
    X = df_temp.iloc[:, :128]
    y = df_temp['GAS']

    X_train, X_test, y_train, y_test = train_test_split(X,
                                                        y,
                                                        test_size=0.3,
                                                        random_state=42)
    fig = lgbm_conf_shap(X_train, X_test, y_train, y_test)

    ### Okay, now let's train with batch 1 to 3, and predict batch 4
    df_temp_train = df_temp[df_temp['Batch ID'].isin([1, 2, 3, 4, 5, 6, 7, 8])]
    df_temp_test = df_temp[df_temp['Batch ID'].isin([9])]
Beispiel #6
0
    pivot.round(2)
    print('\n', pivot.round(2).to_markdown())

    # Calculate which concentration if more common for each gas
    df_c = df_gas[['GAS', 'CONCENTRATION']].value_counts()
    df_c1 = df_c.reset_index()
    df_c1 = df_c1.rename(columns={0: 'count'})
    idx = df_c1.groupby(['GAS'])['count'].transform(max) == df_c1['count']
    result = df_c1[idx].sort_values(by='GAS')
    print(result)

    ## Plots

    # Show samples count per GAS
    fig = plot_count_per_batch_and_gas(df_gas)
    save_figure(fig, 'Step0_Count_Batch_Gas')
    plt.show()

    fig = plot_sample_count_per_gas(df_gas)
    save_figure(fig, 'Step0_Count_Gas')
    plt.show()

    ## Concentration plot (takes time to plot)
    fig, axes = plt.subplots(3, 2, figsize=(25, 20))
    fig.suptitle('Count of measurements of each Gas and concentration')
    for i, ax in enumerate(axes.flatten(), start=1):
        print(i)
        concentration_plot_count(ax, df_gas, gas=i)
    plt.tight_layout()
    plt.show()
    save_figure(fig, 'Step0_Concentration Distribution per gas')
from python.StandardFigure import save_figure
from python.LoadSensorData import get_sensors_list

if __name__ == '__main__':

    # Cargo datos
    df = load_data()

    corr = df.iloc[:, :128].corr()
    fig = plt.figure(figsize=(20,20))
    sns.heatmap(corr, vmin=-1, vmax=1,
                cmap='coolwarm',
                square=True)
    plt.title('Correlation Between Features')
    plt.show()
    save_figure(fig, 'Step0_1_1_CorrelationBetweenFeatures')

    # Obtenemos un dataframe con solo un sensor de Tipo I, II, III y IV
    df_sens = get_sensors_list([0, 2, 4, 6])
    sensors_features = df_sens.drop(['Batch ID', 'GAS','CONCENTRATION'],axis=1)
    fig = plt.figure(figsize=(20, 20));
    ax = sns.heatmap(sensors_features.corr(),
                vmin=-1, vmax=1, annot=True, cmap='coolwarm');
    ax.set_title('Correlation between SensorPack1')
    plt.show()
    save_figure(fig, 'Step0_1_1_CorrelationBetweenFeatures_Data1')

    # Otra combinacion de 4 sensores
    df_sens = get_sensors_list([1, 3, 5, 7])
    sensors_features = df_sens.drop(['Batch ID', 'GAS', 'CONCENTRATION'], axis=1)
    fig = plt.figure(figsize=(20, 20));
Beispiel #8
0
    #Train  neural net for each infividual sensor
    create_model_sensors()

    # load models
    modP1, confP1 = load_and_test_model_pack('Seq_SensorP1', dict_sensors['SensorP1'])
    modP2, confP2 = load_and_test_model_pack('Seq_SensorP2', dict_sensors['SensorP2'])
    modP3, confP3 = load_and_test_model_pack('Seq_SensorP3', dict_sensors['SensorP3'])
    modP4, confP4 = load_and_test_model_pack('Seq_SensorP4', dict_sensors['SensorP4'])

    modA, confA = load_and_test_model_pack('Seq_TypeA', dict_sensors['TypeA'])
    modB, confB = load_and_test_model_pack('Seq_TypeB', dict_sensors['TypeB'])
    modC, confC = load_and_test_model_pack('Seq_TypeC', dict_sensors['TypeC'])
    modD, confD = load_and_test_model_pack('Seq_TypeD', dict_sensors['TypeD'])

    # Los 4 modelos tienen la misma accuracy, pero, ¿y la matrix de confusion?
    fig = plt.figure(); ax = sns.heatmap(confP1, annot=True); plt.title('Seq_SensorP1'); plt.show(); save_figure(fig,'Conf_Seq_SensorP1' )
    fig = plt.figure(); ax = sns.heatmap(confP2, annot=True); plt.title('Seq_SensorP2'); plt.show(); save_figure(fig,'Conf_Seq_SensorP2' )
    fig = plt.figure(); ax = sns.heatmap(confP3, annot=True); plt.title('Seq_SensorP3'); plt.show(); save_figure(fig,'Conf_Seq_SensorP3' )
    fig = plt.figure(); ax = sns.heatmap(confP4, annot=True); plt.title('Seq_SensorP4'); plt.show(); save_figure(fig,'Conf_Seq_SensorP4' )

    # let's see the result for each sensor type
    fig = plt.figure(); ax = sns.heatmap(confA, annot=True); plt.title('Seq_TypeA'); plt.show(); save_figure(fig,'Conf_Seq_TypeA')
    fig = plt.figure(); ax = sns.heatmap(confB, annot=True); plt.title('Seq_TypeB'); plt.show(); save_figure(fig,'Conf_Seq_TypeB')
    fig = plt.figure(); ax = sns.heatmap(confC, annot=True); plt.title('Seq_TypeC'); plt.show(); save_figure(fig,'Conf_Seq_TypeC')
    fig = plt.figure(); ax = sns.heatmap(confD, annot=True); plt.title('Seq_TypeD'); plt.show(); save_figure(fig,'Conf_Seq_TypeD')

    # Nos quedamos con el numero de aciertos (la diagonal)
    diagA = np.diag(confA)
    diagB = np.diag(confB)
    diagC = np.diag(confC)
    diagD = np.diag(confD)