def on_epoch_end(self, epoch, logs=None): pred = self.model.predict_generator(self.generator, self.generator.n_per_epoch()) print(pred.shape) ppred = np.squeeze(pred[:, :, 1]) ll = loss.logloss(np.squeeze(self.generator.label()), ppred) callback_logloss.append(ll)
import ce import game import loss import numpy import routing import sample import sgd N = 4 # number of players T = 100000 # SGD iterations M = 100 # samples of play eps = 0.01 # multinomial smoothing C = 0.01 # CE max welfare coefficient c = C * numpy.ones(N) w = numpy.array([0.0, 0.0, 0.0, 1.0]) # true utility function g = routing.create(N) truth = ce.solve(g, c, w, sgd.create(T, 1, 0, 0, sgd.Rpprox)) demon = sample.draw(M, truth) inst = game.to_instance((g, demon)) w = game.solve(inst, sgd.create(T, 1, 0, 0)) pred = game.predict(inst, w) print(('multinomial loss', loss.logloss(truth, demon, eps))) print(('ice loss', loss.logloss(truth, pred))) print(('truth entropy', loss.logloss(truth, truth)))
def main(): cfg = Config() data_dir = '/kaggle/input/lish-moa' save_path = './' load_path = '../input/model-resnet-tensorflow' runty = 'eval' assert runty == 'traineval' or runty == 'eval', \ "Run type is wrong. Should be 'traineval' or 'eval'" train_features = pd.read_csv(os.path.join(data_dir, 'train_features.csv')) train_targets_scored = pd.read_csv( os.path.join(data_dir, 'train_targets_scored.csv')) train_targets_nonscored = pd.read_csv( os.path.join(data_dir, 'train_targets_nonscored.csv')) test_features = pd.read_csv(os.path.join(data_dir, 'test_features.csv')) sub = pd.read_csv(os.path.join(data_dir, 'sample_submission.csv')) train_targets_scored = train_targets_scored.drop(['sig_id'], axis=1) train_targets_nonscored = train_targets_nonscored.drop(['sig_id'], axis=1) non_ctl_idx = train_features.loc[ train_features['cp_type'] != 'ctl_vehicle'].index.to_list() train_features = train_features.drop( ['sig_id', 'cp_type', 'cp_time', 'cp_dose'], axis=1) train_features = train_features.iloc[non_ctl_idx] train_targets_scored = train_targets_scored.iloc[non_ctl_idx] train_targets_nonscored = train_targets_nonscored.iloc[non_ctl_idx] test_features = test_features.drop(['sig_id', 'cp_dose', 'cp_time'], axis=1) gs = train_features.columns.str.startswith('g-') cs = train_features.columns.str.startswith('c-') # read the main predictors with open('../input/src-resnet-tensorflow/main_predictors.json') as f: tmp = json.load(f) preds = tmp['start_predictors'] oof = tf.constant(0.0) predictions = np.zeros( (test_features.shape[0], train_targets_scored.shape[1])) for seed in cfg.seeds: mskf = MultilabelStratifiedKFold(n_splits=cfg.nfolds, shuffle=True, random_state=seed) for f, (t_idx, v_idx) in enumerate( mskf.split(X=train_features, y=train_targets_scored)): x_train, x_valid = preprocessor(train_features.iloc[t_idx].values, train_features.iloc[v_idx].values, gs, cs) _, data_test = preprocessor( train_features.iloc[t_idx].values, test_features.drop('cp_type', axis=1).values, gs, cs) x_train_2, x_valid_2 = \ preprocessor_2(train_features.iloc[t_idx][preds].values, train_features.iloc[v_idx][preds].values) _, data_test_2 = preprocessor_2( train_features.iloc[t_idx][preds].values, test_features[preds].values) y_train_sc = train_targets_scored.iloc[t_idx].values y_train_ns = train_targets_nonscored.iloc[t_idx].values y_valid_sc = train_targets_scored.iloc[v_idx].values y_valid_ns = train_targets_nonscored.iloc[v_idx].values n_features = x_train.shape[1] n_features_2 = x_train_2.shape[1] trte = train_test(x_train=x_train, x_valid=x_valid, data_test=data_test, x_train_2=x_train_2, x_valid_2=x_valid_2, data_test_2=data_test_2, y_train_sc=y_train_sc, y_train_ns=y_train_ns, y_valid_sc=y_valid_sc, y_valid_ns=y_valid_ns, save_path=save_path, load_path=load_path, fold=f, runty=runty) y_val, predictions_ = trte.run_k_fold(seed) oof += logloss(tf.constant(y_valid_sc, dtype=tf.float32), tf.constant(y_val, dtype=tf.float32)) / ( cfg.nfolds * len(cfg.seeds)) predictions += predictions_ / (cfg.nfolds * len(cfg.seeds)) print("CV log_loss: ", oof) target_cols = train_targets_scored.columns sub.iloc[:, 1:] = predictions sub.loc[test_features['cp_type'] == 'ctl_vehicle', sub.columns[1:]] = 0 # clip the submission # sub_c = sub_clip(sub, test_features) # sub_c.to_csv('submission.csv', index=False) # sub.loc[test_features['cp_type']=='ctl_vehicle', submission.columns[1:]] = 0 sub.to_csv('submission_resnet.csv', index=False) """ if (runty == 'train'):
X_train, X_test = sparse.hstack((X_train, x_train)), sparse.hstack((X_test, x_test)) # if i == 0: # X_train, X_test = x_train, x_test # else: # X_train, X_test = sparse.hstack((X_train, x_train)), sparse.hstack((X_test, x_test)) print X_train.shape # model training lr = LogisticRegression() losses=[] for i in range(1): X_train_t, X_test_t, y_train_t, y_test_t = cross_validation.train_test_split(X_train, y_train, test_size=0.4, random_state=2) lr.fit(X_train_t, y_train_t) proba_test = lr.predict_proba(X_test_t)[:, 1] l = loss.logloss(np.array(y_test_t), np.array(lr.predict_proba(X_test_t)[:, 1])) print "interator:",i," ",l losses.append(l) print "average:",sum(losses)/1 lr.fit(X_train, y_train) proba_test = lr.predict_proba(X_test)[:,1] l=loss.logloss(np.array(y_train),np.array(lr.predict_proba(X_train)[:,1])) print "total: ",l # submission df = pd.DataFrame({"instanceID": dfTest["instanceID"].values, "proba": proba_test}) df.sort_values("instanceID", inplace=True) df.to_csv("submission.csv", index=False)
import ce import game import loss import numpy import routing import sample import sgd N = 4 # number of players T = 100000 # SGD iterations M = 100 # samples of play eps = 0.01 # multinomial smoothing C = 0.01 # CE max welfare coefficient c = C * numpy.ones(N) w = numpy.array([0.0, 0.0, 0.0, 1.0]) # true utility function g = routing.create(N) truth = ce.solve(g, c, w, sgd.create(T, 1, 0, 0, sgd.Rpprox)) demon = sample.draw(M, truth) inst = game.to_instance((g, demon)) w = game.solve(inst, sgd.create(T, 1, 0, 0)) pred = game.predict(inst, w) print 'multinomial loss', loss.logloss(truth, demon, eps) print 'ice loss', loss.logloss(truth, pred) print 'truth entropy', loss.logloss(truth, truth)
import ce import game import loss import numpy import routing import sample import sgd N = 4 # number of players T = 100000 # SGD iterations M = 100 # samples of play eps = 0.01 # multinomial smoothing C = 0.01 # CE max welfare coefficient c = C*numpy.ones(N); w = numpy.array([0.0,0.0,0.0,1.0]) # true utility function g = routing.create(N) truth = ce.solve(g, c, w, sgd.create(T, 1, 0, 0, sgd.Rpprox)) demon = sample.draw(M, truth) inst = game.to_instance((g,demon)) w = game.solve(inst, sgd.create(T, 1, 0, 0)) pred = game.predict(inst, w) print 'multinomial loss', loss.logloss(truth, demon, eps) print 'ice loss', loss.logloss(truth, pred) print 'truth entropy', loss.logloss(truth, truth)