def run_training(fold, params, save_model=False):
    df = pd.read_csv('../data/train_features.csv')
    df = df.drop(['cp_type', 'cp_time', 'cp_dose'], axis=1)

    targets_df = pd.read_csv('../data/train_target_folds.csv')

    features = df.drop('sig_id', axis=1).columns
    target_columns = targets_df.drop(['sig_id', 'kfold'], axis=1).columns

    df = df.merge(targets_df, on='sig_id', how='left')

    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold != fold].reset_index(drop=True)

    xtrain = train_df[features].to_numpy()
    ytrain = train_df[target_columns].to_numpy()

    xvalid = valid_df[features].to_numpy()
    yvalid = valid_df[target_columns].to_numpy()

    train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain)
    valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=1024,
                                               num_workers=8,
                                               shuffle=True)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=1024,
                                               num_workers=8)

    model = utils.Model(n_features=xtrain.shape[1],
                        n_targets=ytrain.shape[1],
                        n_layers=params['num_layers'],
                        hidden_size=params['hidden_size'],
                        dropout=params['dropout'])
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=params['learning_rate'])
    eng = utils.Engine(model, optimizer, device=DEVICE)

    best_loss = np.Inf
    early_stopping_iter = 10
    early_stopping_counter = 0

    for epoch in range(EPOCHS):
        train_loss = eng.train(train_loader)
        valid_loss = eng.evaluate(valid_loader)
        print(f'{fold}, {epoch}, {train_loss}, {valid_loss}')
        if valid_loss < best_loss:
            best_loss = valid_loss
            if save_model:
                torch.save(model.state_dict(), f'model_{fold}.bin')
        else:
            early_stopping_counter += 1

        if early_stopping_counter > early_stopping_iter:
            break
    return best_loss
Exemple #2
0
def run_training(fold, params, save_model=False):
    df = pd.read_csv("../Data/lish-moa/train_features.csv")
    df = df.drop(["cp_type", "cp_time", "cp_dose"], axis=1)

    targets_df = pd.read_csv(
        "/home/self-made-lol/Desktop/Mechanism_of_Actions/Data/lish-moa/train_tragets_fold.csv"
    )

    features_columns = df.drop("sig_id", axis=1).columns
    target_columns = targets_df.drop(["sig_id", "kfold"], axis=1).columns

    df = df.merge(targets_df, on='sig_id', how='left')

    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    xtrain = train_df[features_columns].to_numpy()
    ytrain = train_df[target_columns].to_numpy()

    xvalid = valid_df[features_columns].to_numpy()
    yvalid = valid_df[target_columns].to_numpy()

    train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain)
    valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=1024,
                                               num_workers=8,
                                               shuffle=True)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=1024,
                                               num_workers=8)

    model = utils.Model(
        nfeatures=xtrain.shape[1],
        ntargets=ytrain.shape[1],
        nlayers=params["num_layers"],
        hidden_size=params["hidden_size"],
        dropout=params["dropout"],
    )
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=params["learning_rate"])
    eng = utils.Engine(model, optimizer, device=DEVICE)

    best_loss = np.inf
    early_stopping_iter = 10
    early_stopping_counter = 0

    for epoch in range(EPOCHS):
        train_loss = eng.train(train_loader)
        valid_loss = eng.evaluate(valid_loader)
        print(f"{fold}, {epoch}, {train_loss}, {valid_loss}")
        if valid_loss < best_loss:
            best_loss = valid_loss
            if save_model:
                torch.save(model.state_dict(), f"model_{fold}.bin")
        else:
            early_stopping_counter += 1

            if early_stopping_counter > early_stopping_iter:
                break

    return best_loss
Exemple #3
0
def run_training(fold, save_model=False):
    df = pd.read_csv("./input/train_features.csv")
    df = df.drop(["cp_time", "cp_dose", "cp_type"], axis=1)

    targets_df = pd.read_csv("./input/train_targets_folds.csv")

    feature_columns = df.drop("sig_id", axis=1).columns
    target_columns = targets_df.drop("sig_id", axis=1).columns

    df = df.merge(targets_df, on="sig_id", how="left")
    # print(df)

    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    xtrain = train_df[feature_columns].to_numpy()
    ytrain = train_df[target_columns].to_numpy()

    xvalid = valid_df[feature_columns].to_numpy()
    yvalid = valid_df[target_columns].to_numpy()

    train_dataset = utils.MOADataset(features=xtrain, targets=ytrain)
    valid_dataset = utils.MOADataset(features=xvalid, targets=yvalid)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=1024,
                                               num_workers=8,
                                               shuffle=True)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=1024,
                                               num_workers=8)

    model = utils.Model(
        nfeatures=xtrain.shape[1],
        ntargets=ytrain.shape[1],
        nlayers=2,
        hidden_size=128,
        dropout=0.3,
    )

    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    eng = utils.Engine(model, optimizer, device=DEVICE)

    best_loss = np.inf
    early_stopping_iter = 10
    early_stopping_counter = 0

    for epoch in range(EPOCHS):
        train_loss = eng.train(train_loader)
        valid_loss = eng.evaluate(valid_loader)
        print(f"{fold}, {epoch}, {train_loss}, {valid_loss}")
        if valid_loss < best_loss:
            best_loss = valid_loss
            if save_model:
                torch.save(model.state_dict(), f"model_{fold}.bin")
        else:
            early_stopping_counter += 1

        if early_stopping_counter > early_stopping_iter:
            break
Exemple #4
0
def run_training(fold, params, save_model=False):

    df = pd.read_csv("input/folds/train.csv")
    with open("input/folds/targets", "r") as f:
        targets = f.read().split("\n")
    with open("input/folds/features", "r") as f:
        features = f.read().split("\n")

    print(f"\n[Fold No.{fold:>2}]\n")
    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    x_tr = train_df[features].to_numpy()
    x_va = valid_df[features].to_numpy()

    y_tr = train_df[targets].to_numpy()
    y_va = valid_df[targets].to_numpy()

    # TODO: [BEGIN] NN以外の学習を記述
    dataset_tr = utils.MoaDataset(x_tr, y_tr)
    loader_tr = torch.utils.data.DataLoader(dataset_tr,
                                            batch_size=1024,
                                            num_workers=2)
    dataset_va = utils.MoaDataset(x_va, y_va)
    loader_va = torch.utils.data.DataLoader(dataset_va,
                                            batch_size=1024,
                                            num_workers=2)

    model = models.BaseLine(num_features=x_tr.shape[1],
                            num_targets=y_tr.shape[1],
                            params=params)
    model.to(DEVICE)

    # TODO: 最適化関数とスケジューラの最適化もoptunaに任せたい
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=params["learning_rate"])
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer,
                                                           patience=3,
                                                           threshold=0.00001,
                                                           mode="min",
                                                           verbose=True)
    eng = utils.Engine(model, optimizer, device=DEVICE)

    # Free RAM space as much as possible before training
    del df, train_df, valid_df, x_tr, x_va, y_tr, y_va
    gc.collect()
    # TODO: [END] NN以外の学習を記述

    loss_best = np.inf
    patience = 10
    patience_cnt = 0
    for ep in range(EPOCHS):
        loss_tr = eng.train(loader_tr)
        loss_va = eng.validate(loader_va)
        scheduler.step(loss_va)
        print(f"epoch:{ep:>2}, train:{loss_tr:>.5}, valid:{loss_va:>.5}")

        if loss_va < loss_best:
            loss_best = loss_va
            if save_model:
                pass
        else:
            patience_cnt += 1
        if patience_cnt > patience:
            break

    print(f"[Fold No.{fold:>2}]")
    print(f"epoch:{ep:>3}, train:{loss_tr:>.5}, valid:{loss_va:>.5}")

    if save_model:
        now = datetime.now()
        now = str(now)[5:17].replace(" ", "_").replace(":", "")
        filename = f"weight/model{now}/fold{fold}.pt"
        torch.save(model.model.state_dict(), filename)
        print("model saved at:", filename)

    return loss_best
def run_training(fold, save_model=False):
    df = pd.read_csv(
        "/home/hasan/Data Set/Drug Classification/train_features.csv")
    df = df.drop(['cp_type', 'cp_time', 'cp_dose'], axis=1)

    targets_df = pd.read_csv("/home/hasan/spyder_code/train_targets_folds.csv")

    feature_columns = df.drop('sig_id', axis=1).columns
    target_columns = targets_df.drop(['sig_id', 'kfold'], axis=1).columns

    df = df.merge(targets_df, on='sig_id', how='left')

    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    xtrain = train_df[feature_columns].to_numpy()
    ytrain = train_df[target_columns].to_numpy()

    xvalid = valid_df[feature_columns].to_numpy()
    yvalid = valid_df[target_columns].to_numpy()

    train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain)
    valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=1024,
                                               num_workers=8,
                                               shuffle=True)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=1024,
                                               num_workers=8)

    model = utils.Model(nfeatures=xtrain.shape[1],
                        ntargets=ytrain.shape[1],
                        nlayers=2,
                        hidden_size=128,
                        dropout=0.3)

    # model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    eng = utils.Engine(model, optimizer, DEVICE)

    best_loss = np.inf
    early_stopping_iter = 10
    early_stopping_counter = 0

    for epoch in range(EPOCHS):
        train_loss = eng.train(train_loader)
        valid_loss = eng.evaluate(valid_loader)
        print(f"{fold}, {epoch}, {train_loss}, {valid_loss}")

        if valid_loss < best_loss:
            best_loss = valid_loss
            if save_model:
                torch.save(model.state_dict(), f'model_{fold}.bin')

        else:
            early_stopping_counter += 1

        if early_stopping_counter > early_stopping_iter:
            break
def run_training():
    if torch.cuda.is_available():
        DEVICE = 'cuda'
    else:
        DEVICE = 'cpu'
    df_train = pd.read_csv(PATH + 'train_features.csv')
    targets = pd.read_csv(PATH + 'train_targets_scored.csv')
    utils.get_dummies(df_train, ['cp_type', 'cp_dose', 'cp_time'])
    sig_ids = df_train['sig_id']
    df_train.drop('sig_id', axis=1, inplace=True)
    targets.drop('sig_id', axis=1, inplace=True)

    # TODO use unscored data for training as well
    X_train, X_val, y_train, y_val = train_test_split(df_train.values,
                                                      targets.values,
                                                      test_size=0.3,
                                                      random_state=42)

    train_dataset = utils.ModelDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=4)

    val_dataset = utils.ModelDataset(X_val, y_val)
    val_loader = DataLoader(val_dataset, batch_size=1)

    model = utils.Model(X_train.shape[1], y_train.shape[1], num_layers,
                        hidden_size)
    model.to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=30,
                                                gamma=0.1)

    engine = utils.Engine(model, optimizer, device=DEVICE)

    best_loss = np.inf
    early_stopping = 10
    early_stopping_counter = 0

    # TODO use optuns for trails
    for epoch in range(EPOCHS):
        train_loss = engine.train(train_loader)
        val_loss = engine.validate(val_loader)
        scheduler.step(val_loss)

        print(f'Epoch {epoch}, train_loss {train_loss}, val_loss {val_loss}')

        if val_loss < best_loss:
            best_loss = val_loss
            torch.save(model.state_dict(), '/models')
        else:
            early_stopping_counter += 1

        if early_stopping_counter > early_stopping:
            break

    print(f'best loss {best_loss}')
    return best_loss