Esempio n. 1
0
def val(args):
    data_path = config['voc_path']
    loader = VOCbase(data_path,
                     is_transform=True,
                     img_size=(args.img_rows, args.img_cols))
    valloader = DataLoader(loader, batch_size=args.batch_size, num_workers=4)

    model = FCN8s()
    model.load(args.model_path)
    model.cuda()
    model.eval()
    n_classes = model.n_classes
    gts, preds = [], []
    for i, (images, labels) in tqdm(enumerate(valloader)):
        images = Variable(images.cuda())
        labels = Variable(labels.cuda())
        outputs = model(images)
        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = labels.data.cpu().numpy()

        for gt_, pred_ in zip(gt, pred):
            gts.append(gt_)
            preds.append(pred_)

    score, class_iou = scores(gts, preds, n_class=n_classes)

    for k, v in score.items():
        print(k, v)
    for i in range(n_classes):
        print(i, class_iou[i])
Esempio n. 2
0
File: qdu.py Progetto: kyledh/qdupy
 def post(self):
     parser_copy = parser.copy()
     parser_copy.add_argument('year', type=str, required=True, help=u"学年不能为空")
     parser_copy.add_argument('term', type=str, required=True, help=u"学期不能为空")
     args = parser_copy.parse_args()
     cookies = _login()
     _scores = scores(cookies, args['year'], args['term'])
     return _scores
Esempio n. 3
0
def validate(args):

    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    loader = data_loader(data_path, split=args.split, is_transform=True)
    n_classes = loader.n_classes
    valloader = data.DataLoader(loader, batch_size=1)

    # Setup Model
    model = Net(n_classes)
    print(get_n_params(model))
    model.load_state_dict(torch.load(args.model_path))
    # print(model)
    model.eval()
    if torch.cuda.is_available():
        model.cuda(0)

    gts, preds = [], []
    for i, (images, labels) in enumerate(valloader):
        if torch.cuda.is_available():
            images = Variable(images.cuda(0))
            labels = Variable(labels.cuda(0))
        else:
            images = Variable(images)
            labels = Variable(labels)
        outputs = model(images)
        pred = outputs.data.max(1)[1].cpu().numpy().astype(np.int)
        gt = labels.data.cpu().numpy().astype(np.int)
        for gt_, pred_ in zip(gt, pred):
            gts.append(gt_)
            preds.append(pred_)
        # pred = pred.reshape(360, 480)
        # pred = decode_segmap(pred)
        # m.imsave('./images/{}.png'.format(i), pred)

#break
    score, class_iou = scores(gts, preds, n_class=n_classes)
    for k, v in score.items():
        print(k, v)

    for i in range(n_classes):
        print(i, class_iou[i])
Esempio n. 4
0
def validate(args):

    # Setup Dataloader
    data_loader = get_loader(args.dataset)
    data_path = get_data_path(args.dataset)
    loader = data_loader(data_path, split=args.split, is_transform=True)
    n_classes = loader.n_classes
    valloader = data.DataLoader(loader, batch_size=args.batch_size)

    # Setup Model
    model = LinkNet(n_classes)
    model.load_state_dict(torch.load(args.model_path))
    model.eval()

    if torch.cuda.is_available():
        model.cuda(0)

    gts, preds = [], []
    for i, (images, labels) in enumerate(valloader):
        if torch.cuda.is_available():
            images = Variable(images.cuda(0))
            labels = Variable(labels.cuda(0))
        else:
            images = Variable(images)
            labels = Variable(labels)
        t1 = time.time()
        outputs = model(images)
        t2 = time.time()
        print(t2 - t1)
        pred = outputs.data.max(1)[1].cpu().numpy()
        gt = labels.data.cpu().numpy()

        for gt_, pred_ in zip(gt, pred):
            gts.append(gt_)
            preds.append(pred_)
    score, class_iou = scores(gts, preds, n_class=n_classes)
    for k, v in score.items():
        print k, v

    for i in range(n_classes):
        print i, class_iou[i]
Esempio n. 5
0
    def build_model(self):
        x = utils.input_batch_norm(self.X)
        h_fc1 = self._add_layers(x)

        concat_outputs = h_fc1
        with tf.variable_scope('scores'):
            pred_y = utils.scores(h_fc1, [128, self.labels_num],
                                  [self.labels_num])

        with tf.variable_scope('train'):
            lambda_loss_amount = 0.0015
            l2 = lambda_loss_amount * \
                 sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())
            cross_entropy = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(labels=self.Y,
                                                        logits=pred_y)) + l2
            correct_prediction = tf.equal(tf.argmax(self.Y, 1),
                                          tf.argmax(pred_y, 1))
            accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

        return concat_outputs, cross_entropy, accuracy, correct_prediction
Esempio n. 6
0
def main():

    parser = argparse.ArgumentParser(description='Omega integrals')

    parser.add_argument('-p', '--process', type=str,
                        choices=["omega11", "omega12", "omega13", "omega22", "omegas"],
                        default="omegas",
                        help='Comma-separated names of omega integrals whose regression is performed')

    parser.add_argument('-a', '--algorithm', type=str,
                        choices=['DT', 'RF', 'ET', 'GP', 'KN', 'SVM', 'KR', 'GB', 'HGB', 'MLP'],
                        default='DT',
                        help='transport algorithm')

    parser.add_argument('-l', '--load_model', type=str2bool,
                        nargs='?',
                        choices=[False, True],
                        default=False,
                        const=True,
                        help='Load saved model')

    args = parser.parse_args()

    process = args.process.split(',')
    print("Process: ", colored(process[0], 'green'))

    algorithm = args.algorithm.split(',')
    print("Algorithm: ", colored(algorithm[0],'blue'))

    load_model = args.load_model
    print("Load: ", colored(load_model,'magenta'))

    src_dir = "."
    print("SRC: ", colored(src_dir,'yellow'))

    output_dir = src_dir+"/.."
    print("OUTPUT: ", colored(output_dir,'red'))

    n_jobs = 2

    # Import database
    with open('../data/omega_integrals_encoded.txt') as f:
        lines = (line for line in f if not line.startswith('#'))
        dataset = np.loadtxt(lines, skiprows=1)
    print(dataset.shape)

    x = dataset[:,0:3] # c, d, T
    y = dataset[:,3:]  # Ω(1,1), Ω(1,2), Ω(1,3), Ω(2,2)
    print(x.shape)
    print(y.shape)

    print("### Phase 1: PRE_PROCESSING ###")
    ########################################

    # 1.0) create directory tree
    model, scaler, figure = utils.mk_tree(process[0], algorithm[0], output_dir)

    # 1.1) train/test split dataset
    x_train, x_test, y_train, y_test = train_test_split(x, y, train_size=0.75, test_size=0.25, random_state=69)

    # 1.2) scale data and save scalers
    sc_x = StandardScaler()
    sc_y = StandardScaler()

    sc_x.fit(x_train)
    x_train = sc_x.transform(x_train)
    x_test  = sc_x.transform(x_test)

    sc_y.fit(y_train)
    y_train = sc_y.transform(y_train)
    y_test  = sc_y.transform(y_test)

    print('Training Features Shape:', x_train.shape)
    print('Training Labels Shape:',   y_train.shape)
    print('Testing Features Shape:',  x_test.shape)
    print('Testing Labels Shape:',    y_test.shape)

    dump(sc_x, open(scaler+"/scaler_x_"+process[0]+'.pkl', 'wb'))
    dump(sc_y, open(scaler+"/scaler_y_"+process[0]+'.pkl', 'wb'))

    print("### Phase 2: PROCESSING ###")
    ####################################

    # 2.0) estimator selection
    if (algorithm[0] == 'DT'):
        est, hyper_params = estimators.est_DT()

    elif (algorithm[0] == 'ET'):
        est, hyper_params = estimators.est_ET()

    elif (algorithm[0] == 'SVM'):
        est, hyper_params = estimators.est_SVM()

    elif (algorithm[0] == 'KR'):
        est, hyper_params = estimators.est_KR()

    elif (algorithm[0] == 'KN'):
        est, hyper_params = estimators.est_KN()

    elif (algorithm[0] == 'MLP'):
        est, hyper_params = estimators.est_MLP()

    elif (algorithm[0] == 'GB'):
        est, hyper_params = estimators.est_GB()

    elif (algorithm[0] == 'HGB'):
        est, hyper_params = estimators.est_HGB()

    elif (algorithm[0] == 'RF'):
        est, hyper_params = estimators.est_RF()

    elif (algorithm[0] == 'GB'):
        est, hyper_params = estimators.est_GB()

    else:
        print("Algorithm not implemented ...")

    # 2.1) search for best hyper-parameters combination
    # Exhaustive search over specified parameter values for the estimator
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
    gs = GridSearchCV(est, cv=3, param_grid=hyper_params, verbose=2, n_jobs=n_jobs, scoring='r2',
                      refit=True, pre_dispatch='n_jobs', error_score=np.nan, return_train_score=True)

    # Randomized search on hyper parameters
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV
    # class sklearn.model_selection.RandomizedSearchCV(estimator, param_distributions, *, n_iter=10, scoring=None, n_jobs=None, refit=True,
    #                                                  cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score=nan,
    #                                                  return_train_score=False)
    #gs = RandomizedSearchCV(est, cv=10, n_iter=10, param_distributions=hyper_params, verbose=2, n_jobs=n_jobs, scoring='r2',
    #                        refit=True, pre_dispatch='n_jobs', error_score=np.nan, return_train_score=True)

    # 2.2) training
    utils.fit(x_train, y_train, gs)

    # 2.3) prediction
    y_regr = utils.predict(x_test, gs)

    print("### Phase 3: POST-PROCESSING ###")
    #########################################

    # 3.0) save best hyper-parameters
    results = pd.DataFrame(gs.cv_results_)
    # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
    #compression_opts = dict(method='zip', archive_name='GridSearchCV_results.csv')
    #results.to_csv('GridSearchCV_results.zip', index=False, compression=compression_opts)
    results.to_csv(model+"/../"+"GridSearchCV_results.csv", index=False, sep='\t', encoding='utf-8')

    # results print screen
    print("Best: %f using %s" % (gs.best_score_, gs.best_params_))
    means  = gs.cv_results_['mean_test_score']
    stds   = gs.cv_results_['std_test_score']
    params = gs.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    # 3.1) compute score metrics
    utils.scores(sc_x, sc_y, x_train, y_train, x_test, y_test, model, gs)

    # 3.2) back to original values (unscaling)
    x_test_dim = sc_x.inverse_transform(x_test)
    y_test_dim = sc_y.inverse_transform(y_test)
    y_regr_dim = sc_y.inverse_transform(y_regr)

    # 3.3) make plots
    utils.draw_plot(x_test_dim, y_test_dim, y_regr_dim, figure, process[0], algorithm[0])

    # 3.4) save model to disk
    dump(gs, model+"/model_"+process[0]+".sav")
Esempio n. 7
0
#y_val = data_val.target
#X_val = data_val.drop('target', axis=1)
#X_val = normalize(X_val)

# Train test split
X_train, X_test, y_train, y_test = train_test_split(X_arr, y_arr, test_size=1.0/6, shuffle =False ) # shufflle??

models = [GaussianNB(),
          SVC(random_state=5),
          RandomForestClassifier(random_state=5),
          MLPClassifier(random_state=5)]

for model in models:
    model.fit(X_train, y_train)

UTILS.scores(models, X_test, y_test)

#print models[0].estimator.get_params().keys()

'''
# Grid search for each model
grid_data = [ {'kernel': ['rbf', 'sigmoid'],
               'C': [0.1, 1, 10, 100],
               'random_state': [5]},
              {'n_estimators': [10, 50, 100],
               'criterion': ['gini', 'entropy'],
               'max_depth': [None, 10, 50, 100],
               'min_samples_split': [2, 5, 10],
               'random_state': [5]},
              {'hidden_layer_sizes': [10, 50, 100],
               'activation': ['identity', 'logistic', 'tanh', 'relu'],
Esempio n. 8
0
    def build_model(self):
        x_serie_c = self.X
        xs_s = tf.split(x_serie_c,
                        num_or_size_splits=self.config.c_win_size,
                        axis=1)
        ys_s = tf.split(self.YS,
                        num_or_size_splits=self.config.c_win_size,
                        axis=1)
        concat_outputs = []
        self.losses = []
        self.accuracies = []
        self.correct_preds = []
        with tf.variable_scope('simple_activity') as scope:
            is_reuse = False
            for i, j in zip(xs_s, ys_s):
                sa = SimpleActivity(i,
                                    tf.reshape(j, [-1, self.s_labels_num]),
                                    self.config,
                                    is_training=self.is_training,
                                    norm=self.norm)
                output, loss, accuracy, correct_pred_s = sa.build_model()
                concat_outputs.append(output)
                self.losses.append(loss)
                self.accuracies.append(accuracy)
                self.correct_preds.append(correct_pred_s)
                if not is_reuse:
                    scope.reuse_variables()
                    is_reuse = True

            self.s_mean_loss = tf.reduce_mean(self.losses)
            tf.summary.scalar('loss', self.s_mean_loss)
            self.s_mean_accuracy = tf.reduce_mean(self.accuracies)
            tf.summary.scalar('accuracy', self.s_mean_accuracy)
        self.train_step_s = tf.train.AdamOptimizer(
            self.learning_rate).minimize(self.s_mean_loss)
        with tf.variable_scope('complex_activity'):
            with tf.variable_scope("lstm_layers"):
                lstm_size = 128
                cells = tf.contrib.rnn.MultiRNNCell(
                    [utils.lstm_cell(lstm_size) for _ in range(3)],
                    state_is_tuple=True)
                outputs, states = tf.contrib.rnn.static_rnn(cells,
                                                            concat_outputs,
                                                            dtype=tf.float32)

            pred_y_c = utils.scores(outputs[-1],
                                    [lstm_size, self.c_labels_num],
                                    [self.c_labels_num])
            lambda_loss_amount = 0.0015
            l2 = lambda_loss_amount * \
                 sum(tf.nn.l2_loss(tf_var) for tf_var in tf.trainable_variables())
            cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=self.YC, logits=pred_y_c)) \
                                + l2
            tf.summary.scalar("loss", cross_entropy)
            self.train_step_c = tf.train.AdamOptimizer(
                self.learning_rate).minimize(cross_entropy)
            tf.summary.scalar("learning_rate", self.learning_rate)

            self.joint_loss = cross_entropy + self.s_mean_loss
            self.c_loss = cross_entropy
            tf.summary.scalar("joint_loss", self.joint_loss)

            self.joint_train_step = tf.train.AdamOptimizer(
                self.learning_rate).minimize(self.joint_loss)
            self.correct_prediction_c = tf.equal(tf.argmax(self.YC, 1),
                                                 tf.argmax(pred_y_c, 1))
            self.c_accuracy = tf.reduce_mean(
                tf.cast(self.correct_prediction_c, tf.float32))
            tf.summary.scalar("accuracy", self.c_accuracy)
Esempio n. 9
0
uni=utils.cros_validation(regression.LinearRegression(regularization_factor=1.0),X,train_label,n_folds,random_grid)

random_grid = {'n_estimators': [100,200,300,400,500,600,700,720,740,760,780,800]}


lgb = lgb.LGBMRegressor(objective='regression',num_leaves=5,
                              learning_rate=0.05, n_estimators=800,
                              max_bin = 60, bagging_fraction = 0.8,
                              bagging_freq = 5, feature_fraction = 0.2319,
                              feature_fraction_seed=9, bagging_seed=9,
                              min_data_in_leaf =6, min_sum_hessian_in_leaf = 11)
model_lgb=utils.cros_validation(lgb,train.values,train_label,n_folds,random_grid)


#call k-folds validation 
utils.scores('Lasso',utils.cv_rmse(lasso,train.values,train_label,n_folds))


utils.scores('Multivariate Linear Regression',utils.cv_rmse(multi,train.values,train_label,n_folds))


utils.scores('Univariate Linear Regression',utils.cv_rmse(uni,X,train_label,n_folds))

utils.scores('Gradient Boosting',utils.cv_rmse(model_lgb,train.values,train_label,n_folds))


#---------------------------Meta Learning ----------------------------------

sub = pd.DataFrame()
sub['Id'] = test_id
sub['SalePrice'] = np.exp(lasso.predict(test)*0.2+multi.predict(test)*0.2+model_lgb.predict(test)*0.6)
Esempio n. 10
0
def train(args, out, net_name):
    data_path = get_data_path(args.dataset)
    data_loader = get_loader(args.dataset)
    loader = data_loader(data_path, is_transform=True)
    n_classes = loader.n_classes
    print(n_classes)
    kwargs = {'num_workers': 8, 'pin_memory': True}

    trainloader = data.DataLoader(loader,
                                  batch_size=args.batch_size,
                                  shuffle=True)

    another_loader = data_loader(data_path, split='val', is_transform=True)

    valloader = data.DataLoader(another_loader,
                                batch_size=args.batch_size,
                                shuffle=True)

    # compute weight for cross_entropy2d
    norm_hist = hist / np.max(hist)
    weight = 1 / np.log(norm_hist + 1.02)
    weight[-1] = 0
    weight = torch.FloatTensor(weight)
    model = Bilinear_Res(n_classes)

    if torch.cuda.is_available():
        model.cuda(0)
        weight = weight.cuda(0)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr_rate,
                                 weight_decay=args.w_decay)
    # optimizer = torch.optim.RMSprop(model.parameters(), lr=args.lr_rate)
    scheduler = StepLR(optimizer, step_size=100, gamma=args.lr_decay)

    for epoch in tqdm.tqdm(range(args.epochs),
                           desc='Training',
                           ncols=80,
                           leave=False):
        scheduler.step()
        model.train()
        loss_list = []
        file = open(out + '/{}_epoch_{}.txt'.format(net_name, epoch), 'w')
        for i, (images, labels) in tqdm.tqdm(enumerate(trainloader),
                                             total=len(trainloader),
                                             desc='Iteration',
                                             ncols=80,
                                             leave=False):
            if torch.cuda.is_available():
                images = Variable(images.cuda(0))
                labels = Variable(labels.cuda(0))
            else:
                images = Variable(images)
                labels = Variable(labels)
            optimizer.zero_grad()
            outputs = model(images)
            loss = cross_entropy2d(outputs, labels, weight=weight)
            loss_list.append(loss.data[0])
            loss.backward()
            optimizer.step()

        # file.write(str(np.average(loss_list)))
        print(np.average(loss_list))
        file.write(str(np.average(loss_list)) + '\n')
        model.eval()
        gts, preds = [], []
        if (epoch % 10 == 0):
            for i, (images, labels) in tqdm.tqdm(enumerate(valloader),
                                                 total=len(valloader),
                                                 desc='Valid Iteration',
                                                 ncols=80,
                                                 leave=False):
                if torch.cuda.is_available():
                    images = Variable(images.cuda(0))
                    labels = Variable(labels.cuda(0))
                else:
                    images = Variable(images)
                    labels = Variable(labels)
                outputs = model(images)
                pred = outputs.data.max(1)[1].cpu().numpy()
                gt = labels.data.cpu().numpy()
                for gt_, pred_ in zip(gt, pred):
                    gts.append(gt_)
                    preds.append(pred_)
            score, class_iou = scores(gts, preds, n_class=n_classes)
            for k, v in score.items():
                file.write('{} {}\n'.format(k, v))

            for i in range(n_classes):
                file.write('{} {}\n'.format(i, class_iou[i]))
            torch.save(
                model.state_dict(),
                out + "/{}_{}_{}.pkl".format(net_name, args.dataset, epoch))
        file.close()
Esempio n. 11
0
                        pred[:text_len])).tolist()  # convert tensor to list
                epoch_preds.append(pred_cut)

            for tag, text_len in zip(
                    batch_tag, text_lens):  # batch_tag: [seq_len, num_tags]
                tag_cut = tf.make_ndarray(tf.make_tensor_proto(
                    tag[:text_len])).tolist()  # convert tensor to list
                epoch_trues.append(tag_cut)

            progress_bar.update(1)

    # Convert epoch_idxs to epoch_tags
    epoch_tag_preds = utils.epoch_idx2tag(epoch_preds, idx2tag)
    epoch_tag_trues = utils.epoch_idx2tag(epoch_trues, idx2tag)
    # Calculate metrics for whole epoch
    train_scores = utils.scores(epoch_tag_trues, epoch_tag_preds)

    ### Valid ###
    epoch_preds, epoch_trues = [], []
    with tqdm(total=len(list(valid_batches))) as progress_bar:
        for batch_seq, batch_tag in valid_batches:
            preds, text_lens = valid_fn(model, valid_loss, batch_seq,
                                        batch_tag)

            # Unpad preds/tags to the real lengths (for metrics)
            for pred, text_len in zip(preds,
                                      text_lens):  # logit: [seq_len, num_tags]
                pred_cut = tf.make_ndarray(
                    tf.make_tensor_proto(
                        pred[:text_len])).tolist()  # convert tensor to list
                epoch_preds.append(pred_cut)
Esempio n. 12
0
def train(args):
    if (args.dataset == 'pascal'):
        another_loader = VOC2011ClassSeg(root='/home/vietdv', transform=True)
        loader = SBDClassSeg(root='/home/vietdv', transform=True, augment=True)
    else:
        data_path = get_data_path(args.dataset)
        label_scale = False
        if (args.model == 'encoder'):
            label_scale = True
        data_loader = get_loader(args.dataset)
        loader = data_loader(data_path,
                             is_transform=True,
                             augment=True,
                             label_scale=label_scale)
        another_loader = data_loader(data_path,
                                     split='val',
                                     is_transform=True,
                                     label_scale=label_scale)

    n_classes = loader.n_classes
    trainloader = data.DataLoader(loader, batch_size=args.batch_size)

    valloader = data.DataLoader(another_loader, batch_size=1)
    # get weight for cross_entropy2d
    weight = loader.weight
    model = Net(n_classes)
    if torch.cuda.is_available():
        model.cuda(0)
        weight = weight.cuda(0)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.lr_rate,
                                 weight_decay=args.w_decay)
    criterion = CrossEntropyLoss2d(weight, False)
    # alpha = 0.5
    lambda1 = lambda epoch: pow((1 -
                                 (epoch / args.epochs)), 0.9)  ## scheduler 2
    scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lambda1)
    for epoch in range(args.epochs):
        model.train()
        loss_list = []
        file = open(args.folder + '/{}_{}.txt'.format('hnet', epoch), 'w')
        scheduler.step(epoch)
        for i, (images, labels) in enumerate(trainloader):
            if torch.cuda.is_available():
                images = Variable(images.cuda(0))
                labels = Variable(labels.cuda(0))
            else:
                images = Variable(images)
                labels = Variable(labels)
            optimizer.zero_grad()
            outputs = model(images)
            # loss = alpha * criterion(outputs, labels) / len(images) + (1 - alpha) * lovasz_softmax(outputs, labels, ignore=n_classes-1)
            loss = criterion(outputs, labels) / len(images)
            print(loss.data[0])
            loss_list.append(loss.data[0])
            loss.backward()
            optimizer.step()

        file.write(str(np.average(loss_list)) + '\n')
        model.eval()
        gts, preds = [], []
        for i, (images, labels) in enumerate(valloader):
            if torch.cuda.is_available():
                images = Variable(images.cuda(0))
                labels = Variable(labels.cuda(0))
            else:
                images = Variable(images)
                labels = Variable(labels)
            outputs = model(images)
            pred = outputs.data.max(1)[1].cpu().numpy()
            gt = labels.data.cpu().numpy()
            for gt_, pred_ in zip(gt, pred):
                gts.append(gt_)
                preds.append(pred_)
        score, class_iou = scores(gts, preds, n_class=n_classes)
        # scheduler.step(score['Mean IoU : \t'])
        for k, v in score.items():
            file.write('{} {}\n'.format(k, v))

        for i in range(n_classes - 1):
            file.write('{} {}\n'.format(i, class_iou[i]))
        torch.save(
            model.state_dict(),
            args.folder + "/{}_{}_{}.pkl".format('hnet', args.dataset, epoch))
        file.close()
Esempio n. 13
0
def main():

    parser = argparse.ArgumentParser(description='reaction rates regression')

    parser.add_argument(
        '-p',
        '--process',
        type=str,
        choices=['DR', 'VT', 'VV', 'VV2', 'ZR'],
        default='DR,VT,VV,VV2,ZR',
        help='Comma-separated names of properties whose regression is performed'
    )

    parser.add_argument('-a',
                        '--algorithm',
                        type=str,
                        choices=[
                            'DT', 'RF', 'ET', 'GP', 'KN', 'SVM', 'KR', 'GB',
                            'HGB', 'MLP'
                        ],
                        default='DT',
                        help='regression algorithm')

    args = parser.parse_args()

    process = args.process.split(',')
    directory = process[0] + '/data/processes'
    path = directory + "/*.csv"
    print("Process: ", colored(process[0], 'green'))

    algorithm = args.algorithm.split(',')
    print("Algorithm: ", colored(algorithm[0], 'blue'))

    parent_dir = "."
    print("PWD: ", colored(parent_dir, 'yellow'))

    n_jobs = 2

    for f in glob.glob(path):
        #print("{bcolors.OKGREEN}f{bcolors.ENDC}")
        print(colored(f, 'red'))
        dataset_k = pd.read_csv(f, delimiter=",").to_numpy()
        dataset_T = pd.read_csv(parent_dir + "/" + process[0] +
                                "/data/Temperatures.csv").to_numpy()

        x = dataset_T.reshape(-1, 1)
        y = dataset_k

        print("### Phase 1: PRE_PROCESSING ###")
        ########################################
        '''
        https://stackoverflow.com/questions/50565937/how-to-normalize-the-train-and-test-data-using-minmaxscaler-sklearn
        https://towardsdatascience.com/6-amateur-mistakes-ive-made-working-with-train-test-splits-916fabb421bb
        https://www.analyticsvidhya.com/blog/2020/04/feature-scaling-machine-learning-normalization-standardization/
        https://towardsdatascience.com/scale-standardize-or-normalize-with-scikit-learn-6ccc7d176a02

        You should fit the MinMaxScaler using the training data and
        then apply the scaler on the testing data before the prediction.

        In summary:

        Step 1: fit the scaler on the TRAINING data
        Step 2: use the scaler to transform the TRAINING data
        Step 3: use the transformed training data to fit the predictive model
        Step 4: use the scaler to transform the TEST data
        Step 5: predict using the trained model (step 3) and the transformed TEST data (step 4).

        data = datasets.load_iris()
        X    = data.data
        y    = data.target

        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

        scaler = MinMaxScaler()
        X_train_scaled = scaler.fit_transform(X_train)

        model = SVC()
        model.fit(X_train_scaled, y_train)

        X_test_scaled = scaler.transform(X_test)
        y_pred = model.predict(X_test_scaled)

        '''
        data, dir, proc, model, scaler, figure, outfile = utils.mk_tree(
            f, parent_dir, process[0], algorithm[0])

        # Train/test split dataset
        x_train, x_test, y_train, y_test = train_test_split(x,
                                                            y,
                                                            train_size=0.75,
                                                            test_size=0.25,
                                                            random_state=69)

        # Define scalers: they can be modified to investigate the effect of scalers
        ##############################################################################
        input_scaler = None  #MinMaxScaler(feature_range=(-1,1))
        output_scaler = None  #StandardScaler()
        ##############################################################################

        # Scale None and/or inputs and/or outputs
        x_train, x_test, y_train, y_test = utils.scale_dataset(
            x_train, x_test, y_train, y_test, input_scaler, output_scaler)

        print('Training Features Shape:', x_train.shape)
        print('Training Labels Shape:', y_train.shape)
        print('Testing Features Shape:', x_test.shape)
        print('Testing Labels Shape:', y_test.shape)

        # Save scalers (they may be useful)
        dump(input_scaler, open(scaler + "/scaler_x_MO_" + data + '.pkl',
                                'wb'))
        dump(output_scaler, open(scaler + "/scaler_y_MO_" + data + '.pkl',
                                 'wb'))

        if (algorithm[0] == 'DT'):
            est, hyper_params = estimators.est_DT()

        elif (algorithm[0] == 'ET'):
            est, hyper_params = estimators.est_ET()

        elif (algorithm[0] == 'SVM'):
            est, hyper_params = estimators.est_SVM()

        elif (algorithm[0] == 'KR'):
            est, hyper_params = estimators.est_KR()

        elif (algorithm[0] == 'KN'):
            est, hyper_params = estimators.est_KN()

        elif (algorithm[0] == 'MLP'):
            est, hyper_params = estimators.est_MLP()

        elif (algorithm[0] == 'GB'):
            est, hyper_params = estimators.est_GB()

        elif (algorithm[0] == 'HGB'):
            est, hyper_params = estimators.est_HGB()

        elif (algorithm[0] == 'RF'):
            est, hyper_params = estimators.est_RF()

        elif (algorithm[0] == 'GB'):
            est, hyper_params = estimators.est_GB()

        else:
            print("Algorithm not implemented ...")

        # https://github.com/ray-project/tune-sklearn
        # https://docs.ray.io/en/latest/tune/api_docs/sklearn.html#tune-sklearn-docs
        # class ray.tune.sklearn.TuneGridSearchCV(estimator, param_grid, early_stopping=None, scoring=None,
        # n_jobs=None, cv=5, refit=True, verbose=0, error_score='raise', return_train_score=False,
        # local_dir='~/ray_results', max_iters=1, use_gpu=False, loggers=None, pipeline_auto_early_stop=True,
        # stopper=None, time_budget_s=None, sk_n_jobs=None)
        #scheduler = MedianStoppingRule(grace_period=10.0)
        #gs = TuneGridSearchCV(est, cv=10, param_grid=hyper_params, verbose=2, n_jobs=n_jobs, scoring='r2',
        #                  refit=True, error_score=np.nan, return_train_score=True)
        #tune_search = TuneSearchCV(clf, parameter_grid, search_optimization="hyperopt", n_trials=3, early_stopping=scheduler, max_iters=10)
        #tune_search.fit(x_train, y_train)

        # Exhaustive search over specified parameter values for the estimator
        # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
        gs = GridSearchCV(est,
                          cv=5,
                          param_grid=hyper_params,
                          verbose=2,
                          n_jobs=n_jobs,
                          scoring='r2',
                          refit=True,
                          pre_dispatch='n_jobs',
                          error_score=np.nan,
                          return_train_score=True)

        # Randomized search on hyper parameters
        # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV
        # class sklearn.model_selection.RandomizedSearchCV(estimator, param_distributions, *, n_iter=10, scoring=None, n_jobs=None, refit=True,
        #                                                  cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score=nan,
        #                                                  return_train_score=False)
        #gs = RandomizedSearchCV(est, cv=10, n_iter=10, param_distributions=hyper_params, verbose=2, n_jobs=n_jobs, scoring='r2',
        #                        refit=True, pre_dispatch='n_jobs', error_score=np.nan, return_train_score=True)

        # Training
        utils.fit(x_train, y_train, gs, outfile)

        results = pd.DataFrame(gs.cv_results_)
        # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
        #compression_opts = dict(method='zip', archive_name='GridSearchCV_results.csv')
        #results.to_csv('GridSearchCV_results.zip', index=False, compression=compression_opts)
        results.to_csv(model + "/../" + "GridSearchCV_results.csv",
                       index=False,
                       sep='\t',
                       encoding='utf-8')

        #plt.figure(figsize=(12, 4))
        #for score in ['mean_test_recall', 'mean_test_precision', 'mean_test_min_both']:
        #    plt.plot([_[1] for _ in results['param_class_weight']], results[score], label=score)
        #plt.legend();

        #plt.figure(figsize=(12, 4))
        #for score in ['mean_train_recall', 'mean_train_precision', 'mean_test_min_both']:
        #    plt.scatter(x=[_[1] for _ in results['param_class_weight']], y=results[score.replace('test', 'train')], label=score)
        #plt.legend();

        # summarize results
        print("Best: %f using %s" % (gs.best_score_, gs.best_params_))
        means = gs.cv_results_['mean_test_score']
        stds = gs.cv_results_['std_test_score']
        params = gs.cv_results_['params']
        for mean, stdev, param in zip(means, stds, params):
            print("%f (%f) with: %r" % (mean, stdev, param))

        # Perform prediction
        y_regr = utils.predict(x_test, gs, outfile)

        # Compute the scores
        utils.scores(input_scaler, output_scaler, x_train, y_train, x_test,
                     y_test, model, gs, outfile)

        # Transform back
        x_train, x_test, y_train, y_test, y_regr = utils.scale_back_dataset(
            x_train, x_test, y_train, y_test, y_regr, input_scaler,
            output_scaler)

        # Make figures
        utils.draw_plot(x_test, y_test, y_regr, figure, data)

        # save the model to disk
        dump(gs, model + "/model_MO_" + data + '.sav')
Esempio n. 14
0
def main():

    parser = argparse.ArgumentParser(description='relaxation terms regression')

    #    parser.add_argument('-p', '--process', type=str,
    #                        choices=["shear", "bulk", "conductivity", "thermal_diffusion", "mass_diffusion"],
    #                        default="shear,bulk,conductivity,thermal_diffusion,mass_diffusion",
    #                        help='Comma-separated names of transport properties whose regression is performed')

    parser.add_argument('-a',
                        '--algorithm',
                        type=str,
                        choices=[
                            'DT', 'RF', 'ET', 'GP', 'KN', 'SVM', 'KR', 'GB',
                            'HGB', 'MLP'
                        ],
                        default='DT',
                        help='regression algorithm')

    args = parser.parse_args()

    #    process   = args.process.split(',')
    #    print("Process: ", colored(process[0], 'green'))

    algorithm = args.algorithm.split(',')
    print("Algorithm: ", colored(algorithm[0], 'blue'))

    src_dir = "."
    print("SRC: ", colored(src_dir, 'yellow'))

    output_dir = src_dir + "/.."
    print("OUTPUT: ", colored(output_dir, 'red'))

    n_jobs = 2

    # Import database
    dataset = np.loadtxt("../data/transposed_reshaped_data.txt")
    #   with open('../data/TCs_air5.txt') as f:
    #       lines = (line for line in f if not line.startswith('#'))
    #       dataset = np.loadtxt(lines, skiprows=1)

    print(dataset.shape)

    #    if (process[0] == "shear"):
    #        x = dataset[:,0:7] # T, P, x_N2, x_O2, x_NO, x_N, x_O
    #        y = dataset[:,7:8] # shear viscosity
    #    elif (process[0] == "bulk"):
    #        x = dataset[:,0:7] # T, P, x_N2, x_O2, x_NO, x_N, x_O
    #        y = dataset[:,8:9] # bulk viscosity
    #    elif (process[0] == "conductivity"):
    #        x = dataset[:,0:7] # T, P, x_N2, x_O2, x_NO, x_N, x_O
    #        y = dataset[:,9:10]# thermal conductivity
    #    elif (process[0] == "thermal_diffusion"):
    #        x = dataset[:,0:7] # T, P, x_N2, x_O2, x_NO, x_N, x_O
    #        y = dataset[:,10:] # thermal diffusion, D_Ti
    #    elif (process[0] == "mass_diffusion"):
    #        x = dataset[:,0:7] # T, P, x_N2, x_O2, x_NO, x_N, x_O
    #        y = dataset[:,:]   # mass diffusion TODO

    x = dataset[:, 0:50]  # ni_n[47], na_n[1], V, T
    y = dataset[:, 50:]  # RD_mol[47], RD_at[1]

    print(x.shape)
    print(y.shape)

    print("### Phase 1: PRE_PROCESSING ###")
    ########################################

    # 1.0) create directory tree
    model, scaler, figure = utils.mk_tree(algorithm[0], output_dir)

    # 1.1) train/test split dataset
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        train_size=0.75,
                                                        test_size=0.25,
                                                        random_state=69)

    # 1.2) scale data and save scalers
    sc_x = StandardScaler()
    sc_y = StandardScaler()

    sc_x.fit(x_train)
    x_train = sc_x.transform(x_train)
    x_test = sc_x.transform(x_test)

    sc_y.fit(y_train)
    y_train = sc_y.transform(y_train)
    y_test = sc_y.transform(y_test)

    print('Training Features Shape:', x_train.shape)
    print('Training Labels Shape:', y_train.shape)
    print('Testing Features Shape:', x_test.shape)
    print('Testing Labels Shape:', y_test.shape)

    dump(sc_x, open(scaler + "/scaler_x.pkl", 'wb'))
    dump(sc_y, open(scaler + "/scaler_y.pkl", 'wb'))

    print("### Phase 2: PROCESSING ###")
    ####################################

    # 2.0) estimator selection
    if (algorithm[0] == 'DT'):
        est, hyper_params = estimators.est_DT()

    elif (algorithm[0] == 'ET'):
        est, hyper_params = estimators.est_ET()

    elif (algorithm[0] == 'SVM'):
        est, hyper_params = estimators.est_SVM()

    elif (algorithm[0] == 'KR'):
        est, hyper_params = estimators.est_KR()

    elif (algorithm[0] == 'KN'):
        est, hyper_params = estimators.est_KN()

    elif (algorithm[0] == 'MLP'):
        est, hyper_params = estimators.est_MLP()

    elif (algorithm[0] == 'GB'):
        est, hyper_params = estimators.est_GB()

    elif (algorithm[0] == 'HGB'):
        est, hyper_params = estimators.est_HGB()

    elif (algorithm[0] == 'RF'):
        est, hyper_params = estimators.est_RF()

    elif (algorithm[0] == 'GB'):
        est, hyper_params = estimators.est_GB()

    else:
        print("Algorithm not implemented ...")

    # 2.1) search for best hyper-parameters combination
    # Exhaustive search over specified parameter values for the estimator
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html
    gs = GridSearchCV(est,
                      cv=10,
                      param_grid=hyper_params,
                      verbose=2,
                      n_jobs=n_jobs,
                      scoring='r2',
                      refit=True,
                      pre_dispatch='n_jobs',
                      error_score=np.nan,
                      return_train_score=True)

    # Randomized search on hyper parameters
    # https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.RandomizedSearchCV.html#sklearn.model_selection.RandomizedSearchCV
    # class sklearn.model_selection.RandomizedSearchCV(estimator, param_distributions, *, n_iter=10, scoring=None, n_jobs=None, refit=True,
    #                                                  cv=None, verbose=0, pre_dispatch='2*n_jobs', random_state=None, error_score=nan,
    #                                                  return_train_score=False)
    #gs = RandomizedSearchCV(est, cv=10, n_iter=10, param_distributions=hyper_params, verbose=2, n_jobs=n_jobs, scoring='r2',
    #                        refit=True, pre_dispatch='n_jobs', error_score=np.nan, return_train_score=True)

    # 2.2) training
    utils.fit(x_train, y_train, gs)

    # 2.3) prediction
    y_regr = utils.predict(x_test, gs)

    print("### Phase 3: POST-PROCESSING ###")
    #########################################

    # 3.0) save best hyper-parameters
    results = pd.DataFrame(gs.cv_results_)
    # https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_csv.html
    #compression_opts = dict(method='zip', archive_name='GridSearchCV_results.csv')
    #results.to_csv('GridSearchCV_results.zip', index=False, compression=compression_opts)
    results.to_csv(model + "/../" + "GridSearchCV_results.csv",
                   index=False,
                   sep='\t',
                   encoding='utf-8')

    # results print screen
    print("Best: %f using %s" % (gs.best_score_, gs.best_params_))
    means = gs.cv_results_['mean_test_score']
    stds = gs.cv_results_['std_test_score']
    params = gs.cv_results_['params']
    for mean, stdev, param in zip(means, stds, params):
        print("%f (%f) with: %r" % (mean, stdev, param))

    # 3.1) compute score metrics
    utils.scores(sc_x, sc_y, x_train, y_train, x_test, y_test, model, gs)

    # 3.2) back to original values (unscaling)
    x_test_dim = sc_x.inverse_transform(x_test)
    y_test_dim = sc_y.inverse_transform(y_test)
    y_regr_dim = sc_y.inverse_transform(y_regr)

    # 3.3) make plots
    utils.draw_plot(x_test_dim, y_test_dim, y_regr_dim, figure)

    # 3.4) save model to disk
    dump(gs, model + "/model.sav")
Esempio n. 15
0
print(df_raw.isnull().sum().sort_index() / len(df_raw))
''' Fill the missing value with mean, and use codes to represent categories.'''
df, y = utils.process(df_raw, 'SalePrice')

print(df.head(1))
''' Use all data to train will lead to overfitting.'''
# m = RandomForestRegressor(n_jobs=-1)
# m.fit(df, y)
# # `m.score` will return r² value (1 is good, 0 is bad)
# print(m.score(df, y))
''' Split the data to train set and validate set.'''
validate_size = 12000  # kaggle test set size
train_size = len(df) - validate_size

X_train, X_valid = utils.split(df, train_size)
y_train, y_valid = utils.split(y, train_size)
print(X_train.shape, X_valid.shape, y_train.shape, y_valid.shape)
''' Take sample to train will save a lot of time.'''
df, y = utils.process(df_raw, 'SalePrice', sample_size=30000)
''' Don't change the validate set.'''
X_train, _ = utils.split(df, 20000)
y_train, _ = utils.split(y, 20000)
print(X_train.shape, X_valid.shape, y_train.shape, y_valid.shape)

m = RandomForestRegressor(n_estimators=1,
                          max_depth=3,
                          bootstrap=False,
                          n_jobs=-1)
m.fit(X_train, y_train)
print(utils.scores(m, X_train, y_train, X_valid, y_valid))