コード例 #1
0
def retrain(params):
    #  load_data
    train_trial_list = \
        [x for x in TRIAL_LIST if x != params['outer_f']]

    sb_n = params['sb_n']

    train_loader = pre.load_data_cnn(DATA_PATH, sb_n, train_trial_list,
                                     params['batch_size'])

    model = utils.Model()
    model.to(DEVICE)
    optimizer = getattr(torch.optim, params['optimizer'])(model.parameters(),
                                                          lr=params['lr'])
    eng = utils.EngineTrain(model, optimizer, device=DEVICE)

    loss_params = pre.update_loss_params(params)
    loss_params['device'] = DEVICE
    print(loss_params)
    best_loss = params['best_loss']
    for epoch in range(1, EPOCHS + 1):
        if 'annealing_step' in loss_params:
            loss_params['epoch_num'] = epoch
        train_loss = eng.re_train(train_loader, loss_params)
        print(f"epoch:{epoch}, train_loss:{train_loss}",
              f"best_loss_from_cv:{best_loss}")
        if train_loss < best_loss:
            break

    torch.save(model.state_dict(), params['saved_model'])
    return
コード例 #2
0
def run_training(fold, params, save_model=False):
    df = pd.read_csv('../data/train_features.csv')
    df = df.drop(['cp_type', 'cp_time', 'cp_dose'], axis=1)

    targets_df = pd.read_csv('../data/train_target_folds.csv')

    features = df.drop('sig_id', axis=1).columns
    target_columns = targets_df.drop(['sig_id', 'kfold'], axis=1).columns

    df = df.merge(targets_df, on='sig_id', how='left')

    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold != fold].reset_index(drop=True)

    xtrain = train_df[features].to_numpy()
    ytrain = train_df[target_columns].to_numpy()

    xvalid = valid_df[features].to_numpy()
    yvalid = valid_df[target_columns].to_numpy()

    train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain)
    valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=1024,
                                               num_workers=8,
                                               shuffle=True)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=1024,
                                               num_workers=8)

    model = utils.Model(n_features=xtrain.shape[1],
                        n_targets=ytrain.shape[1],
                        n_layers=params['num_layers'],
                        hidden_size=params['hidden_size'],
                        dropout=params['dropout'])
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=params['learning_rate'])
    eng = utils.Engine(model, optimizer, device=DEVICE)

    best_loss = np.Inf
    early_stopping_iter = 10
    early_stopping_counter = 0

    for epoch in range(EPOCHS):
        train_loss = eng.train(train_loader)
        valid_loss = eng.evaluate(valid_loader)
        print(f'{fold}, {epoch}, {train_loss}, {valid_loss}')
        if valid_loss < best_loss:
            best_loss = valid_loss
            if save_model:
                torch.save(model.state_dict(), f'model_{fold}.bin')
        else:
            early_stopping_counter += 1

        if early_stopping_counter > early_stopping_iter:
            break
    return best_loss
コード例 #3
0
 def get_model_chi(mol, param_path, run_path, model_name):
     """Consolidate the actual chi-getting process."""
     # print('Param path: {}\nModel Name: {}'.format(param_path, model_name))
     model = utils.Model(mol, param_path, run_path, model_name)
     model.make_fits()
     model.obs_sample()
     model.chiSq(mol)
     model.delete()
     return model.raw_chi
コード例 #4
0
ファイル: train.py プロジェクト: bharadwajanup/AI_Learning
def train_data(dataset_dir, fraction):
    unclassified = {}
    model = defaultdict(Counter)
    topics = Counter()
    word_counter = Counter()
    doc_topic_counter = Counter()
    for topic in os.listdir(dataset_dir):
        if topic.startswith('.'):
            continue
        topic_dir = dataset_dir + "/" + topic

        for cur_file in os.listdir(topic_dir):
            file_path = topic_dir + "/" + cur_file
            words = utils.get_file_content(file_path)

            # Flip a coin to check if the program can see the classification of the document.
            if read_classification(fraction):
                topics[topic] += len(words)
                doc_topic_counter[topic] += 1
                for word in words:
                    model[topic][word] += 1
                    word_counter[word] += 1
            else:
                topics[topic] += 0
                doc_topic_counter[topic] += 0
                if cur_file not in unclassified:
                    unclassified[cur_file] = (words, 'None')
                else:
                    new_name = cur_file + random_name()
                    while new_name in unclassified:
                        print("More duplicates found!")
                        new_name = cur_file + random_name()
                    unclassified[new_name] = (words, 'None')
    # Create our Model.
    model_obj = utils.Model(model, topics, word_counter, doc_topic_counter)

    # Iteratively classify unclassified documents till the loop ends or when there are no more
    # changes to the model.
    for i in range(12):
        print("Iteration %d" % i)
        model_obj, count_changed = train_unclassified_documents(
            unclassified, model_obj)
        if count_changed == 0:
            break

    # Return the object for serialization.
    return model_obj
コード例 #5
0
ファイル: testing.py プロジェクト: YuzhouLin/current_proj
def test(params):
    #  load_data
    device = torch.device('cpu')
    test_trial = params['outer_f']
    sb_n = params['sb_n']

    # Load testing Data
    inputs, targets = pre.load_data_test_cnn(DATA_PATH, sb_n, test_trial)

    # Load trained model
    model = utils.Model()
    model.load_state_dict(
        torch.load(params['saved_model'], map_location=device))
    model.eval()

    # Get Results
    outputs = model(inputs.to(device)).detach()

    # Load the Testing Engine
    eng = utils.EngineTest(outputs, targets)

    common_keys_for_update_results = ['sb_n', 'edl_used', 'outer_f']

    dict_for_update_acc = \
        {key: params[key] for key in common_keys_for_update_results}
    dict_for_update_R = copy.deepcopy(dict_for_update_acc)

    eng.update_result_acc(dict_for_update_acc)

    # Get the optimal activation function
    if EDL_USED == 0:
        dict_for_update_R['acti_fun'] = 'softmax'
    else:
        # Get from hyperparameter study
        core_path = f'study/ecnn{EDL_USED}/sb{sb_n}'
        study_path = "sqlite:///" + core_path + f"/t{test_trial}.db"
        loaded_study = optuna.load_study(study_name="STUDY",
                                         storage=study_path)
        temp_best_trial = loaded_study.best_trial
        dict_for_update_R['acti_fun'] = temp_best_trial.params['evi_fun']

    print(dict_for_update_R)
    eng.update_result_R(dict_for_update_R)

    return
コード例 #6
0
def run_training(fold, params, save_model=False):
    df = pd.read_csv("../Data/lish-moa/train_features.csv")
    df = df.drop(["cp_type", "cp_time", "cp_dose"], axis=1)

    targets_df = pd.read_csv(
        "/home/self-made-lol/Desktop/Mechanism_of_Actions/Data/lish-moa/train_tragets_fold.csv"
    )

    features_columns = df.drop("sig_id", axis=1).columns
    target_columns = targets_df.drop(["sig_id", "kfold"], axis=1).columns

    df = df.merge(targets_df, on='sig_id', how='left')

    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    xtrain = train_df[features_columns].to_numpy()
    ytrain = train_df[target_columns].to_numpy()

    xvalid = valid_df[features_columns].to_numpy()
    yvalid = valid_df[target_columns].to_numpy()

    train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain)
    valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=1024,
                                               num_workers=8,
                                               shuffle=True)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=1024,
                                               num_workers=8)

    model = utils.Model(
        nfeatures=xtrain.shape[1],
        ntargets=ytrain.shape[1],
        nlayers=params["num_layers"],
        hidden_size=params["hidden_size"],
        dropout=params["dropout"],
    )
    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=params["learning_rate"])
    eng = utils.Engine(model, optimizer, device=DEVICE)

    best_loss = np.inf
    early_stopping_iter = 10
    early_stopping_counter = 0

    for epoch in range(EPOCHS):
        train_loss = eng.train(train_loader)
        valid_loss = eng.evaluate(valid_loader)
        print(f"{fold}, {epoch}, {train_loss}, {valid_loss}")
        if valid_loss < best_loss:
            best_loss = valid_loss
            if save_model:
                torch.save(model.state_dict(), f"model_{fold}.bin")
        else:
            early_stopping_counter += 1

            if early_stopping_counter > early_stopping_iter:
                break

    return best_loss
コード例 #7
0
def run_training(fold, params, save_model):
    # load_data
    '''
    temp_trial_list = [
        x for x in TRIAL_LIST if x not in params['test_trial_list']]
    '''
    o_f = params['outer_f']  # outer fold num
    temp_trial_list = [x for x in TRIAL_LIST if x != o_f]
    valid_trial_list = [temp_trial_list.pop(fold)]
    train_trial_list = temp_trial_list

    sb_n = params['sb_n']

    train_loader = pre.load_data_cnn(DATA_PATH, sb_n, train_trial_list,
                                     params['batch_size'])
    valid_loader = pre.load_data_cnn(DATA_PATH, sb_n, valid_trial_list,
                                     params['batch_size'])

    trainloaders = {
        "train": train_loader,
        "val": valid_loader,
    }

    # Load Model
    model = utils.Model()
    model.to(DEVICE)
    # optimizer = torch.optim.Adam(model.parameters(), lr=0.01,amsgrad=True)
    optimizer = getattr(torch.optim, params['optimizer'])(model.parameters(),
                                                          lr=params['lr'])

    eng = utils.EngineTrain(model, optimizer, device=DEVICE)

    loss_params = pre.update_loss_params(params)
    loss_params['device'] = DEVICE

    if save_model:
        prefix_path = f'model_innerloop/ecnn{EDL_USED}/'
        if not os.path.exists(prefix_path):
            os.makedirs(prefix_path)

        filename = f"sb{sb_n}_o{o_f}_i{fold}.pt"
        model_name = os.path.join(prefix_path, filename)

    best_loss = np.inf
    early_stopping_iter = 10
    for epoch in range(1, EPOCHS + 1):
        if 'annealing_step' in loss_params:
            loss_params['epoch_num'] = epoch
        train_losses = eng.train(trainloaders, loss_params)
        train_loss = train_losses['train']
        valid_loss = train_losses['val']
        print(f"fold:{fold}, "
              f"epoch:{epoch}, "
              f"train_loss: {train_loss}, "
              f"valid_loss: {valid_loss}. ")
        if valid_loss < best_loss:
            best_loss = valid_loss
            early_stopping_counter = 0
            if save_model:
                torch.save(
                    {
                        'epoch': epoch,
                        'model_state_dict': model.state_dict(),
                        'optimizer': params['optimizer'],
                        'optimizer_state_dict': optimizer.state_dict(),
                        'train_loss': train_loss,
                        'valid_loss': valid_loss
                    }, model_name)
        else:
            early_stopping_counter += 1
        if early_stopping_counter > early_stopping_iter:
            break
    return best_loss
コード例 #8
0
def run_training(fold, save_model=False):
    df = pd.read_csv("./input/train_features.csv")
    df = df.drop(["cp_time", "cp_dose", "cp_type"], axis=1)

    targets_df = pd.read_csv("./input/train_targets_folds.csv")

    feature_columns = df.drop("sig_id", axis=1).columns
    target_columns = targets_df.drop("sig_id", axis=1).columns

    df = df.merge(targets_df, on="sig_id", how="left")
    # print(df)

    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    xtrain = train_df[feature_columns].to_numpy()
    ytrain = train_df[target_columns].to_numpy()

    xvalid = valid_df[feature_columns].to_numpy()
    yvalid = valid_df[target_columns].to_numpy()

    train_dataset = utils.MOADataset(features=xtrain, targets=ytrain)
    valid_dataset = utils.MOADataset(features=xvalid, targets=yvalid)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=1024,
                                               num_workers=8,
                                               shuffle=True)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=1024,
                                               num_workers=8)

    model = utils.Model(
        nfeatures=xtrain.shape[1],
        ntargets=ytrain.shape[1],
        nlayers=2,
        hidden_size=128,
        dropout=0.3,
    )

    model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    eng = utils.Engine(model, optimizer, device=DEVICE)

    best_loss = np.inf
    early_stopping_iter = 10
    early_stopping_counter = 0

    for epoch in range(EPOCHS):
        train_loss = eng.train(train_loader)
        valid_loss = eng.evaluate(valid_loader)
        print(f"{fold}, {epoch}, {train_loss}, {valid_loss}")
        if valid_loss < best_loss:
            best_loss = valid_loss
            if save_model:
                torch.save(model.state_dict(), f"model_{fold}.bin")
        else:
            early_stopping_counter += 1

        if early_stopping_counter > early_stopping_iter:
            break
def run_training(fold, save_model=False):
    df = pd.read_csv(
        "/home/hasan/Data Set/Drug Classification/train_features.csv")
    df = df.drop(['cp_type', 'cp_time', 'cp_dose'], axis=1)

    targets_df = pd.read_csv("/home/hasan/spyder_code/train_targets_folds.csv")

    feature_columns = df.drop('sig_id', axis=1).columns
    target_columns = targets_df.drop(['sig_id', 'kfold'], axis=1).columns

    df = df.merge(targets_df, on='sig_id', how='left')

    train_df = df[df.kfold != fold].reset_index(drop=True)
    valid_df = df[df.kfold == fold].reset_index(drop=True)

    xtrain = train_df[feature_columns].to_numpy()
    ytrain = train_df[target_columns].to_numpy()

    xvalid = valid_df[feature_columns].to_numpy()
    yvalid = valid_df[target_columns].to_numpy()

    train_dataset = utils.MoaDataset(features=xtrain, targets=ytrain)
    valid_dataset = utils.MoaDataset(features=xvalid, targets=yvalid)

    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=1024,
                                               num_workers=8,
                                               shuffle=True)

    valid_loader = torch.utils.data.DataLoader(valid_dataset,
                                               batch_size=1024,
                                               num_workers=8)

    model = utils.Model(nfeatures=xtrain.shape[1],
                        ntargets=ytrain.shape[1],
                        nlayers=2,
                        hidden_size=128,
                        dropout=0.3)

    # model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    eng = utils.Engine(model, optimizer, DEVICE)

    best_loss = np.inf
    early_stopping_iter = 10
    early_stopping_counter = 0

    for epoch in range(EPOCHS):
        train_loss = eng.train(train_loader)
        valid_loss = eng.evaluate(valid_loader)
        print(f"{fold}, {epoch}, {train_loss}, {valid_loss}")

        if valid_loss < best_loss:
            best_loss = valid_loss
            if save_model:
                torch.save(model.state_dict(), f'model_{fold}.bin')

        else:
            early_stopping_counter += 1

        if early_stopping_counter > early_stopping_iter:
            break
コード例 #10
0
    hps = parser.parse_args()
    hps.n_labels = 10
    hps.final_search = True if hps.final_search in ['True', 'true', '1'
                                                    ] else False
    settings.init(hps)

    sess = tf.InteractiveSession()

    ### load dataset and create the model
    import utils
    data1, x_test, y_test, y_test_0 = utils.load_dataset(hps)
    conv_l, dense_l = utils.get_weights_conv(data1, hps)
    settings.init(hps)
    settings.init_layers(conv_l, dense_l)
    model = utils.Model(hps)

    if hps.p == 'linf': import FAB_linf
    elif hps.p == 'l2': import FAB_l2
    elif hps.p == 'l1': import FAB_l1

    ### run the attack in batches of size hps.bs for the first hps.im images of the test set
    if hps.dataset in ['cifar10']: y_test_0 = y_test_0[0]
    t1 = time.time()
    adv = np.zeros(x_test[:hps.im].shape)
    res = np.zeros([hps.im])
    sp = 0
    while sp < hps.im:
        if hps.p == 'linf':
            res[sp:sp + hps.bs], adv[sp:sp + hps.bs] = FAB_linf.FABattack_linf(
                model, x_test[sp:sp + hps.bs], y_test_0[sp:sp + hps.bs], sess,
コード例 #11
0
def run_training():
    if torch.cuda.is_available():
        DEVICE = 'cuda'
    else:
        DEVICE = 'cpu'
    df_train = pd.read_csv(PATH + 'train_features.csv')
    targets = pd.read_csv(PATH + 'train_targets_scored.csv')
    utils.get_dummies(df_train, ['cp_type', 'cp_dose', 'cp_time'])
    sig_ids = df_train['sig_id']
    df_train.drop('sig_id', axis=1, inplace=True)
    targets.drop('sig_id', axis=1, inplace=True)

    # TODO use unscored data for training as well
    X_train, X_val, y_train, y_val = train_test_split(df_train.values,
                                                      targets.values,
                                                      test_size=0.3,
                                                      random_state=42)

    train_dataset = utils.ModelDataset(X_train, y_train)
    train_loader = DataLoader(train_dataset,
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=4)

    val_dataset = utils.ModelDataset(X_val, y_val)
    val_loader = DataLoader(val_dataset, batch_size=1)

    model = utils.Model(X_train.shape[1], y_train.shape[1], num_layers,
                        hidden_size)
    model.to(DEVICE)

    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer,
                                                step_size=30,
                                                gamma=0.1)

    engine = utils.Engine(model, optimizer, device=DEVICE)

    best_loss = np.inf
    early_stopping = 10
    early_stopping_counter = 0

    # TODO use optuns for trails
    for epoch in range(EPOCHS):
        train_loss = engine.train(train_loader)
        val_loss = engine.validate(val_loader)
        scheduler.step(val_loss)

        print(f'Epoch {epoch}, train_loss {train_loss}, val_loss {val_loss}')

        if val_loss < best_loss:
            best_loss = val_loss
            torch.save(model.state_dict(), '/models')
        else:
            early_stopping_counter += 1

        if early_stopping_counter > early_stopping:
            break

    print(f'best loss {best_loss}')
    return best_loss