예제 #1
0
 def train_execution(self):           
     
     if self.arch == 'vgg19':
         self.model = models.vgg19(pretrained=True)
     elif self.arch == 'vgg16':    
         self.model = models.vgg16(pretrained=True)
     elif self.arch == 'densenet121':    
         self.model = models.densenet121(pretrained=True)
     else:
         print("Sorry {} is not a valid model for this exercise. Please use vgg16, vgg19, or densenet121".format(self.arch))                     
     
     if self.device == 'gpu':
         self.device = 'cuda'
     else:
         self.device = 'cpu'
                     
     # get the models see loss function and optimizer
     self.model_definition()        
     criterion = nn.CrossEntropyLoss()
     optimizer = optim.Adam(self.model.classifier.parameters(), lr=self.learning_rate)
     
     # get images/data
     train_dataloader, valid_dataloader, test_dataloader, train_datasets = get_data(self.data_dir)
     
     # define delay of LR as training goes through
     decay_schedule = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
     
     # train and print validation score
     for e in range(self.epochs):
         print('epoch {}/{}'.format(e+1, self.epochs))
         self.train(train_dataloader, self.device, criterion, optimizer,valid_dataloader,decay_schedule)
     
     save_model(self.model, self.save_dir, self.arch, self.hidden_units, self.dropout, self.epochs, self.learning_rate, train_datasets) 
def get_model_data():
    '''
    This is the only function needed to run to return a dataframe useable in modeling
    '''
    data = prepare.get_data()
    data = remove_columns(data)
    data = fix_nas(data)
    data = encode_categorical_columns(data)
    data = data.dropna()
    return data
예제 #3
0
def pretrain(C, data, num_epoch=10, pos_lim=300, neg_lim=300, model=None):
    device = 0

    (trainset, devset, testset), lab_num = get_data(C,
                                                    fold=data,
                                                    files=True,
                                                    pos_lim=pos_lim,
                                                    neg_lim=neg_lim)
    if model is None:
        model = get_model(C, lab_num)
    optimer, loss_func = get_others(C, model)

    for epoch_id in range(num_epoch):
        model, _ = train(C, model, trainset, loss_func, optimer, epoch_id,
                         "PT", device)

    return model
예제 #4
0
def main():

	with open(os.path.join(C.save_path , C.save_name + ".pkl") , "rb") as fil:
		model = pickle.load(fil)

	run_id 		= int(re.search("/(\\d+)$" , C.save_name).group(1))
	loss_func 	= tc.nn.CrossEntropyLoss()
	optimer   	= tc.optim.Adam(params = model.parameters() , lr = 1e-3 , weight_decay = 1e-8)
	C.uniform_sample = True
	C.grad_clip = -1 
	C.bs 		= 10
	(trainset , devset , testset) , lab_num = get_data  (C , fold = run_id)

	model , train_loss  = train(C, model, trainset, loss_func, optimer, 0, run_id, 0)
	print(train_loss)

	troc_auc , tprc_auc = evaluate(C, model, testset , loss_func, 0, run_id, 0, "Test")
	print(troc_auc , tprc_auc)
	pdb.set_trace()
	troc_auc , tprc_auc = evaluate(C, model, testset , loss_func, 0, run_id, 0, "Test")
	troc_auc , tprc_auc = evaluate(C, model, testset , loss_func, 0, run_id, 0, "Test")
	troc_auc , tprc_auc = evaluate(C, model, testset , loss_func, 0, run_id, 0, "Test")
예제 #5
0
def kfold(C , k = 10 , choose_one = [] , p_model = None):

	if C.finger or C.mol2vec:
		finger_dict = load_fingers(C , C.data)
	else:
		finger_dict = None

	device = 0

	roc_aucs 	= []
	prc_aucs 	= []
	for run_id in range(k):

		if len(choose_one) > 0 and run_id not in choose_one: #只跑选择的那一个
			continue

		(trainset , devset , testset) , lab_num = get_data  (C , fold = run_id)

		models = []
		optimers = []
		for j in range(C.ensemble):
			model = get_model (C , lab_num)
			if p_model is not None:
				copy_param(model , p_model)
			model = model.to(device)
			optimer , loss_func = get_others(C , model)

			models.append(model)
			optimers.append(optimer)

		ens_eval_m = EnsembleModel(models)

		E.log("%d th run starts on device %d\n" % (run_id , device))

		best_epoch	= -1
		best_metric = -1
		tes_roc_auc = -1
		tes_prc_auc = -1
		for epoch_id in range(C.num_epoch):

			train_loss = 0.
			for ens_id in range(C.ensemble):
				model , _train_loss = train(C, models[ens_id], trainset, loss_func, optimers[ens_id], 
									epoch_id, "{0}-{1}".format(run_id , ens_id), device , finger_dict)
				train_loss += (_train_loss / C.ensemble)

			droc_auc , dprc_auc = evaluate(C, ens_eval_m, devset , loss_func, 
									epoch_id, run_id, device, "Dev" , finger_dict)
			troc_auc , tprc_auc = evaluate(C, ens_eval_m, testset, loss_func, 
									epoch_id, run_id, device, "Test", finger_dict)

			E.log("Epoch %d of run %d ended." % (epoch_id , run_id))
			E.log("Dev  Roc-Auc = %.4f Prc-Auc = %.4f" % (droc_auc , dprc_auc))
			E.log("Test Roc-Auc = %.4f Prc-Auc = %.4f" % (troc_auc , tprc_auc))
			E.log()

			if C.train_loss_val:
				metric_val = -train_loss
			else:
				metric_val = dprc_auc

			if (best_epoch < 0 or metric_val > best_metric) or C.no_valid:
				best_epoch 	= epoch_id
				best_metric = metric_val
				tes_roc_auc = troc_auc
				tes_prc_auc = tprc_auc
				save_model(ens_eval_m , C.save_path , E.core.id , str(run_id))

		E.log("%d th run ends. best epoch = %d" % (run_id , best_epoch))
		E.log("Best metric = %.4f"                     % (best_metric))
		E.log("Got Test Roc-Auc = %.4f Prc-Auc = %.4f" % (tes_roc_auc , tes_prc_auc))
		E.log()

		E["Test ROC-AUC"]["Best"].update(tes_roc_auc , run_id)
		E["Test PRC-AUC"]["Best"].update(tes_prc_auc , run_id)

		roc_aucs.append(tes_roc_auc)
		prc_aucs.append(tes_prc_auc)

		E.log("model saved.")

		E.log("--------------------------------------------------------------")

	roc_auc_avg = sum(roc_aucs) / len(roc_aucs)
	roc_auc_std = (sum([(x - roc_auc_avg) ** 2 for x in roc_aucs]) / len(roc_aucs)) ** 0.5
	prc_auc_avg = sum(prc_aucs) / len(prc_aucs)
	prc_auc_std = (sum([(x - prc_auc_avg) ** 2 for x in prc_aucs]) / len(prc_aucs)) ** 0.5

	E["Test ROC-AUC"].update("%.4f ± %.4f" % (roc_auc_avg , roc_auc_std))
	E["Test PRC-AUC"].update("%.4f ± %.4f" % (prc_auc_avg , prc_auc_std))
	E.log ("got avg test Roc-Auc = %.4f ± %.4f Prc-Auc = %.4f ± %.4f" % (
		roc_auc_avg , roc_auc_std , prc_auc_avg , prc_auc_std)
	)

	
	E.log("All run end!")
예제 #6
0
def eval_run(C , p_model = None):

	if C.finger or C.mol2vec:
		finger_dict = load_fingers(C , C.data)
	else:
		finger_dict = None

	device = 0


	(trainset , devset , testset) , lab_num = get_data  (C , fold = "test")

	models = []
	optimers = []
	for k in range(C.ensemble):
		model = get_model (C , lab_num)
		if p_model is not None:
			copy_param(model , p_model)
		model = model.to(device)
		optimer , loss_func = get_others(C , model)

		models.append(model)
		optimers.append(optimer)

	ens_eval_m = EnsembleModel(models)

	best_epoch	= -1
	best_metric = -1
	for epoch_id in range(C.num_epoch):

		train_loss = 0.
		for ens_id in range(C.ensemble):
			model , _train_loss = train(C, models[ens_id], trainset, loss_func, optimers[ens_id], 
								epoch_id, "{0}-{1}".format(0 , ens_id), device , finger_dict)
			train_loss += (_train_loss / C.ensemble)

		E.log("Epoch %d ended." % (epoch_id))
		E.log()

		if C.train_loss_val:
			metric_val = -train_loss
		else:
			assert False

		if (best_epoch < 0 or metric_val > best_metric) or C.no_valid:
			best_epoch 	= epoch_id
			best_metric = metric_val
			save_model(ens_eval_m , C.save_path , E.core.id , "eval")

	E.log("run ends. best epoch = %d" % (best_epoch))
	E.log("Best metric = %.4f"        % (best_metric))
	E.log()
	E.log("model saved.")
	E.log("--------------------------------------------------------------")
	
	best_model = load_model(C.save_path , E.core.id , "eval")
	tot_pos_ps = evaluate(C, best_model, testset , loss_func, 
						epoch_id, 0, device, "Dev" , finger_dict , ret_preds = True)

	save_pred(tot_pos_ps , C.data , "to_upload.csv")

	E.log("All run end!")
예제 #7
0
import pandas as pd
import numpy as np
from fbprophet import Prophet
from prepare import get_data, get_prepped, make_weighted, make_weighted_monthly
from fbprophet.diagnostics import cross_validation, performance_metrics
from sklearn.model_selection import ParameterGrid
from sklearn.preprocessing import StandardScaler
import pickle
import matplotlib.pyplot as plt
from predictions import get_model, store_model

# gets data without imputed weather values
data = get_data()

# creates new dataframe with Prophet-friendly column names
df = pd.DataFrame()
df['y'] = data.resample('M').inflated.mean()
df = df.reset_index()
df = df.rename(columns={'date': 'ds'})

# creates linear prophet model to fit only on the inflated price
m = Prophet(growth='linear')
m.fit(df)

future = m.make_future_dataframe(freq='D', periods=365 * 8)

forecast = m.predict(future)

cv = cross_validation(m, horizon='298 days')

performance_metrics(cv).rmse.mean()  # RMSE: 135.51
예제 #8
0
                    apply_encoding, get_test_ids

import logging
logging.basicConfig(level=logging.INFO, format='[tfm-nuclei] - %(message)s')

import argparse
parser = argparse.ArgumentParser()
parser.add_argument("--model-dir", help="Ruta del modelo entrenado", type=str)
parser.add_argument("--test-dir", help="Ruta de imagenes de test", type=str)
parser.add_argument("--resolution", help="Redimensionar test, usar solo un entero. e.g: 128 para 128x128", type=int)

args = parser.parse_args()

model = load_trained_model(args.model_dir)

test = get_data(args.test_dir,args.test_dir,resolution=(args.resolution,args.resolution))

logging.info("Realizando predicciones sobre el conjunto de test")
predictions = np.asarray([model.predict(t) for t in tqdm(test)])
#predictions = model.predict(test)
logging.info("Completado")

resized_predictions = get_test_original_resolution(predictions, get_test_resolutions(args.test_dir))
individualized_masks_with_uid = get_predicted_mask_separated(get_test_ids(args.test_dir) ,resized_predictions)
encoded_masks = apply_encoding(individualized_masks_with_uid)

submission = pd.DataFrame(np.array(encoded_masks, dtype=object), columns=["ImageId", "EncodedPixels"])
submission['EncodedPixels'] = pd.Series(submission.EncodedPixels.values).apply(lambda x: ' '.join(str(y) for y in x))
submission.to_csv("arb_submission.csv", index=False)
logging.info(submission.head())
예제 #9
0
            d_loss.append(0.5 * np.add(d_loss_real, d_loss_fake))
            if (epoch % 3) == 0:
                g_loss.append(
                    GAN.train_on_batch(
                        np.array(X_train[batch * epoch:batch * epoch +
                                         batch_size]),
                        np.zeros((batch_size, 1, 1, 1))))
        logging.info("\t\t perdida discriminador --> %s" % (d_loss[epoch]))
        if (epoch % 3) == 0:
            logging.info("\t\t perdida generador --> %s" %
                         (g_loss[int(epoch / 3)]))


Y_t = get_masks(args.label, resolution=(args.resolution, args.resolution))
X_t = get_data(args.train,
               args.train,
               resolution=(args.resolution, args.resolution))

logging.info("Cargando red discriminadora")
discriminator = get_discriminator(resolution=(args.resolution,
                                              args.resolution))
logging.info("Cargando red discriminadora")
generator = get_generator(resolution=(args.resolution, args.resolution))
logging.info("Cargando red adversaria")
GAN = get_gan(generator, discriminator)
#train(GAN, generator, discriminator, X_t, Y_t)
train_gan_augmented(GAN, generator, discriminator, X_t, Y_t)
try:
    logging.info("Saving model weights to file")
    GAN.save(args.export_dir or "generated_model")
except Exception as e: