y = pd.concat([y_gal.replace(di_gal), y_exgal.replace(di_exgal)], 
              ignore_index=True)

y_pred_gal = pd.DataFrame(y_pred_gal)
y_pred_gal.columns = [f'class_{di_gal[x]}' for x in range(len(di_gal))]

y_pred_exgal = pd.DataFrame(y_pred_exgal)
y_pred_exgal.columns = [f'class_{di_exgal[x]}' for x in range(len(di_exgal))]

y_pred = pd.concat([y_pred_gal, y_pred_exgal], 
                   ignore_index=True).fillna(0)

y_pred = y_pred[[f'class_{c}' for c in utils_metric.classes]]

loss = utils_metric.multi_weighted_logloss(y.values, y_pred.values)


# =============================================================================
# weight
# =============================================================================
import utils_post

y_true = pd.get_dummies(y)

weight = utils_post.get_weight(y_true, y_pred.values, eta=0.1, nround=9999)
weight = np.append(weight, 1)
print(list(weight))


# =============================================================================
Beispiel #2
0
sub_tr = pd.concat([sub_tr, oof], axis=1)
sub_tr.columns = ['object_id'] + [
    f'class_{i}' for i in sorted(classes_gal + classes_exgal)
]

sub_tr.loc[sub_tr.object_id.isin(oid_gal),
           [f'class_{i}' for i in classes_exgal]] = 0
sub_tr.loc[sub_tr.object_id.isin(oid_exgal),
           [f'class_{i}' for i in classes_gal]] = 0

weight = np.array([1, 2, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1])
weight = weight / sub_tr.iloc[:, 1:].sum()
weight = weight.values

y_pred = sub_tr.iloc[:, 1:].values.astype(float)
print('before:', utils_metric.multi_weighted_logloss(y.values, y_pred))
print('after:', utils_metric.multi_weighted_logloss(y.values, y_pred * weight))

utils.plot_confusion_matrix(__file__, y_pred * weight)

# =============================================================================
# weight
# =============================================================================
import utils_post

y_pred *= weight
y_true = pd.get_dummies(y)

weight = utils_post.get_weight(y_true, y_pred, eta=0.1, nround=9999)

print(f'weight: np.array({list(weight)})')
Beispiel #3
0
    wloss_list.append(ret['wloss-mean'][-1])

for i, y_pred in enumerate(y_preds):
    y_pred = utils_metric.softmax(y_pred.astype(float).values)
    if i == 0:
        y_preds_ = y_pred
    else:
        y_preds_ += y_pred

y_preds_ /= len(y_preds)

# =============================================================================
#
# =============================================================================

utils_metric.multi_weighted_logloss(y, y_preds_)


def multi_weighted_logloss(y_true: np.array, y_preds: np.array):
    """
    @author olivier https://www.kaggle.com/ogrellier
    multi logloss for PLAsTiCC challenge
    """
    # class_weights taken from Giba's topic : https://www.kaggle.com/titericz
    # https://www.kaggle.com/c/PLAsTiCC-2018/discussion/67194
    # with Kyle Boone's post https://www.kaggle.com/kyleboone
    classes = [6, 15, 16, 42, 52, 53, 62, 64, 65, 67, 88, 90, 92, 95]
    class_weight = {
        6: 1,
        15: 2,
        16: 1,
    1.1768777584208459, 0.9498970981272328, 0.6113702626667485,
    0.48242068928933035, 1.2894930889416614, 1.423971561601788,
    0.6535155757119984, 1.6161049089839221, 0.5743188118409728,
    1.1906849086994178, 0.6527050232072442, 0.42181435682919677,
    0.9394690895273552, 1.061672745432284
])

classes_gal = [6, 16, 53, 65, 92]
classes_exgal = [15, 42, 52, 62, 64, 67, 88, 90, 95]

sub_tr = utils.load_train(['object_id'])

sub_tr = pd.concat([sub_tr, oof2], axis=1)
sub_tr.columns = ['object_id'] + [
    f'class_{i}' for i in sorted(classes_gal + classes_exgal)
]

sub_tr.loc[sub_tr.object_id.isin(oid_gal),
           [f'class_{i}' for i in classes_exgal]] = 0
sub_tr.loc[sub_tr.object_id.isin(oid_exgal),
           [f'class_{i}' for i in classes_gal]] = 0

oof2 = sub_tr.iloc[:, 1:].values.astype(float) * weight

oof = (oof1 + oof2) / 2

y = utils.load_target().target
print('oof1:', utils_metric.multi_weighted_logloss(y.values, oof1))
print('oof2:', utils_metric.multi_weighted_logloss(y.values, oof2))
print('ave:', utils_metric.multi_weighted_logloss(y.values, oof))
Beispiel #5
0
def plot_confusion_matrix(__file__, y_pred, normalize=True,
                          title='Confusion Matrix'):
    
    import matplotlib as mpl
    mpl.use('Agg')
    from matplotlib import pyplot as plt
    from sklearn.metrics import confusion_matrix
    import itertools
    import utils_metric
    
    classes = ['class_6',
             'class_15',
             'class_16',
             'class_42',
             'class_52',
             'class_53',
             'class_62',
             'class_64',
             'class_65',
             'class_67',
             'class_88',
             'class_90',
             'class_92',
             'class_95']
    
    y = load_target().target
    
    target_dict = {}
    target_dict_r = {}
    for i,e in enumerate(y.sort_values().unique()):
        target_dict[e] = i
        target_dict_r[i] = e
    
    y = y.replace(target_dict).values
    
    score = utils_metric.multi_weighted_logloss(y, y_pred)
    
    cnf_matrix = confusion_matrix(y, np.argmax(y_pred, axis=-1))
    np.set_printoptions(precision=2)
    
    if normalize:
        cnf_matrix = cnf_matrix.astype('float') / cnf_matrix.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cnf_matrix)
    
    plt.figure(figsize=(12,12))
    plt.imshow(cnf_matrix, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title(f'{title}: {round(score, 5)}')
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cnf_matrix.max() / 2.
    for i, j in itertools.product(range(cnf_matrix.shape[0]), range(cnf_matrix.shape[1])):
        plt.text(j, i, format(cnf_matrix[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cnf_matrix[i, j] > thresh else "black")
    
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.tight_layout()
    plt.savefig(f'LOG/CM_{__file__}.png')
    
    send_line(f'Confusion Matrix wmlogloss: {score}', png=f'LOG/CM_{__file__}.png')
    
    return
Beispiel #6
0
imp = ex.getImp(model_all)
imp['split'] /= imp['split'].max()
imp['gain'] /= imp['gain'].max()
imp['total'] = imp['split'] + imp['gain']

imp.sort_values('total', ascending=False, inplace=True)
imp.reset_index(drop=True, inplace=True)

imp.to_csv(f'LOG/imp_{__file__}.csv', index=False)

# =============================================================================
# eval
# =============================================================================
for i, y_pred in enumerate(y_preds):
    y_pred = pd.DataFrame(utils_metric.softmax(y_pred.astype(float).values))
    if i == 0:
        tmp = y_pred
    else:
        tmp += y_pred
tmp /= len(y_preds)
y_preds = tmp.copy().values.astype(float)

w_score = utils_metric.multi_weighted_logloss(y.values, y_preds)
a_score = utils_metric.akiyama_metric(y.values, y_preds)
print(f'{w_score}    {a_score}')

#==============================================================================
utils.end(__file__)
utils.stop_instance()