def train_function(config): model = model_class_( dataset_cls=dataset_class_, #network_fn=network_fn_, dataset_args=dataset_args, model_args=config, network_args=config['network_args'], optimizer_args=config['optimizer_args'] ) train_model( model, dataset, epochs=train_args['epochs'], batch_size=train_args['batch_size'], flags=train_args.get('flags', {}), save_dir=save_dir_function(config) ) # fit the validation loss, optimize w.r.t. val_loss at epoch=(5*epochs) val_loss = model.val_loss num_epochs = train_args['epochs'] popt, pcov = curve_fit(val_loss_fit_func, np.arange(num_epochs), val_loss) estimated_loss = val_loss_fit_func(5*num_epochs, *popt) print("Estimated val_loss at epoch", num_epochs*5, ": ", estimated_loss) return estimated_loss
def main(): args = parse_args() nlayers = 7 cell_type = 'GRU' net_input = Input(shape=(args.lookback, 2)) x_0 = Dense(args.ncells, activation='relu')(net_input) x_1 = Dense(args.ncells, activation='relu')(x_0) x_2 = GRU(args.ncells, return_sequences=True, dropout=args.dropout, recurrent_dropout=args.recurrent_dropout)(add([x_0, x_1])) x_3 = Dense(args.ncells, activation='relu')(add([x_0, x_1, x_2])) x_4 = GRU(args.ncells, return_sequences=True, dropout=args.dropout, recurrent_dropout=args.recurrent_dropout)(add( [x_0, x_1, x_2, x_3])) x_5 = Dense(args.ncells, activation='relu')(add([x_0, x_1, x_2, x_3, x_4])) x_6 = Flatten()(GRU(args.ncells, return_sequences=True, dropout=args.dropout, recurrent_dropout=args.recurrent_dropout)(add( [x_0, x_1, x_2, x_3, x_4, x_5]))) out_x = Dense(args.delay, activation='linear')(x_6) out_y = Dense(args.delay, activation='linear')(x_6) model = Model(inputs=net_input, outputs=[out_x, out_y]) train_model(model, args, nlayers=nlayers, cell_type=cell_type)
def do_training(): confidence = 0.5 if 'confidence' in request.args: confidence = request.args.get('confidence') extract_data(confidence) train_model() return Response(json.dumps({'result': 'ok'}))
def train(self): '''' This function is used to load all utils and train model. ''' #finetuning last layer of model for param in self.model.parameters(): param.requires_grad = False self.model.Conv2d_1out3_1x1 = nn.Conv2d(256, self.outFeatures, kernel_size=(1, 1), stride=(1, 1), bias=False) self.model.Conv2d_1out2_1x1 = nn.Conv2d(512, self.outFeatures, kernel_size=(1, 1), stride=(1, 1), bias=False) self.model.Conv2d_1out1_1x1 = nn.Conv2d(1024, self.outFeatures, kernel_size=(1, 1), stride=(1, 1), bias=False) optimizer, lr_schedular, criterion = self.load_utils() #pass to trainig script train_model(self.model, self.trainLoader, self.testLoader, criterion, optimizer, lr_schedular, self.numEpoch, self.batchSize, self.trainLen, self.testLen)
def test_y_input_dtype(self): """ Tests that train_model will not run with wrong data types for y_var. :params self: :returns boolean: """ with self.assertRaises(TypeError): tm.train_model(X_VAR, 1, 'Hawaii') tm.train_model(X_VAR, 'check', 'Hawaii')
def test_x_input_dtype(self): """ Test that train_model will not run with wrong data types for x_var :params self: :returns boolean """ with self.assertRaises(AttributeError): tm.train_model(1, Y_VAR, 'Hawaii') tm.train_model('check', Y_VAR, 'Hawaii')
def test_input_size(self): """ Tests that train_model will not run if sizes of x_var and y_var do not match. :params self: :returns boolean: """ with self.assertRaises(IndexError): tm.train_model(X_VAR[:500], Y_VAR, 'Hawaii') tm.train_model(X_VAR, Y_VAR[:500], 'Hawaii')
def train_xz_init_model_chorda(num_classes, epochs): model_name = "XZ-Unet-Init" # create a new model folder based on name and date now = datetime.datetime.now() now_str = now.strftime('%Y-%m-%d_%H-%M-%S') rootDir = "C:/users/jfauser/IPCAI2019/ModelData/" + model_name + "/" print("Started at {}".format(now_str)) input_shape = (128, 512) model = unet(input_shape + (1, ), num_classes) batch_size = 8 print("loading data set 1") file_dataset1 = rootDir + "dataset1.h5" f1 = h5py.File(file_dataset1.strip(), "r") images_1 = f1["images"][()] all_labels = f1["labels"][()] # already in to_categorical slices = [] for set in all_labels: slices.append(set[:, :, 5]) #chorda label slices = np.stack(slices) labels_1 = utils.to_categorical(slices, num_classes) print("loading data set 2") file_dataset2 = rootDir + "dataset2.h5" f2 = h5py.File(file_dataset2.strip(), "r") images_2 = f2["images"][()] all_labels = f2["labels"][()] slices = [] for set in all_labels: slices.append(set[:, :, 5]) #chorda label slices = np.stack(slices) labels_2 = utils.to_categorical(slices, num_classes) out_dir = rootDir + now_str + "/" filename_weigths1 = out_dir + "weights1.hdf5" filename_weigths2 = out_dir + "weights2.hdf5" if not os.path.isdir(out_dir): os.mkdir(out_dir) print("train model 1") train_model.train_model([images_1, labels_1], filename_weigths1, model, batch_size, epochs) # commences training on the set del model print("train model 2") model = unet(input_shape + (1, ), num_classes) train_model.train_model([images_2, labels_2], filename_weigths2, model, batch_size, epochs) # commences training on the set del model
def main(args, logger): # Instantiate an optimizer. optimizer = keras.optimizers.Adam(learning_rate=args['LR']) # Instantiate a loss function. criterion = keras.losses.BinaryCrossentropy(from_logits=True) #=============================================== # loader = Loader() # # print('start data load..') # # uids, iids, df_train, df_test, \ # df_neg, users, items, item_lookup = loader.load_dataset() # user_input, item_input, labels = loader.get_train_instances(uids, iids, args['NUM_NEG'], len(items)) # # print('end data load..') # # # input data 준비 # user_data_shuff, item_data_shuff, label_data_shuff = shuffle(user_input, item_input, labels) # user_data_shuff = np.array(user_data_shuff).reshape(-1, 1) # item_data_shuff = np.array(item_data_shuff).reshape(-1, 1) # label_data_shuff = np.array(label_data_shuff).reshape(-1, 1) # =============================================== # collab_mapped.csv 있어야함 loader = Data(args) df_order = loader.load_df() uids, iids, df_train, df_test, df_test_neg, users, items, \ user_input, item_input, year_input, month_input, weekday_input, daytime_input, labels = \ loader.prepare_df(df_order) """ # Prepare the training dataset. (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() # Reserve 10,000 samples for validation. x_val = x_train[-10000:] y_val = y_train[-10000:] x_train = x_train[:-10000] y_train = y_train[:-10000] # Prepare the training dataset. train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) train_dataset = train_dataset.shuffle(buffer_size=1024).batch(args['BATCH_SIZE']) # Prepare the validation dataset. val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)) val_dataset = val_dataset.batch(args['BATCH_SIZE']) """ # Model initialize model = NeuMF(len(user_input), len(item_input)) train_model(model, train_dataset, val_dataset, criterion, optimizer, args, logger)
def main(): test_df, train_df = load_data() train_x, train_y, indices_for_masking, pca_instance, scaler_instance = preprocess_data( train_df) test_x = preprocess_data(test_df, indices_for_masking, pca_instance, scaler_instance) model = create_model(train_x.shape[1], MODEL_TYPE) train_model(model, MODEL_TYPE, train_x, train_y) predictions = predict_data(model, MODEL_TYPE, test_x) submission_file_directory = write_output(predictions)
def main(args): preprocessor = Preprocessor(FLAGS) word_dict = preprocessor.build_dict() embeddings = preprocessor.get_init_embedding() model = Model(embeddings, FLAGS, len(word_dict)) model.build() if FLAGS.mode == "train": train_model(model, preprocessor, word_dict, FLAGS) elif FLAGS.mode == "test": test_model(model, preprocessor, word_dict, FLAGS)
def create_database(videoPaths, dataset_path, embeddings_path, detector_path, embemodel_path, confidence_path, data_path, recognizer_path, le_path): # Process videos process_videos(videoPaths, dataset_path) # Extract embeddings and update pickle files extract_embeddings(embeddings_path, detector_path, embemodel_path, confidence_path, data_path) # train model and update le.pickle file train_model(recognizer_path, embeddings_path, le_path) print("All Done.")
def train_xy_init_model(num_classes, epochs): # first get the time now = datetime.datetime.now() now_str = now.strftime('%Y-%m-%d_%H-%M-%S') print("Started at {}".format(now_str)) # then create a new model folder based on name and date model_name = "XY-Unet-Init" rootDir = "C:/users/jfauser/IPCAI2019/ModelData/" + model_name + "/" out_dir = rootDir + now_str + "/" if not os.path.isdir(out_dir): os.mkdir(out_dir) # compile the net architecture batch_size = 16 input_shape = (512, 512) model = unet(input_shape + (1, ), num_classes) # load the two training sets print("loading data set 1") if num_classes == 2: images_1, labels_1 = tools.get_data_set_for_chorda(rootDir + "dataset1.h5") else: images_1, labels_1 = tools.get_data_set(rootDir + "dataset1.h5") print("loading data set 2") if num_classes == 2: images_2, labels_2 = tools.get_data_set_for_chorda(rootDir + "dataset2.h5") else: images_2, labels_2 = tools.get_data_set(rootDir + "dataset2.h5") # and train filename_weigths1 = out_dir + "weights1.hdf5" filename_weigths2 = out_dir + "weights2.hdf5" print("train model 1") train_model.train_model([images_1, labels_1], filename_weigths1, model, batch_size, epochs) # commences training on the set del model print("train model 2") model = unet(input_shape + (1, ), num_classes) train_model.train_model([images_2, labels_2], filename_weigths2, model, batch_size, epochs) # commences training on the set del model return now_str
def update_model(self): reply2 = QMessageBox.question(self.focusWidget(), 'The model needs to be trained', ' Do you want to train the model? It may take a while', QMessageBox.Yes, QMessageBox.No) if reply2 == QtGui.QMessageBox.No: return else: ExEm.extract_embeddings() TM.train_model() self.read_files() print ("[INFO] Model updated ")
def train_and_verify(parameters, directory, birds, debug=False, skip_clean=True): try: os.chdir(directory) classifierType = parameters[0] mtStep = parameters[1] mtWin = parameters[2] stStep = parameters[3] stWin = parameters[4] train_rootdir = os.path.join(directory, 'Training') test_rootdir = os.path.join(directory, 'Validation') train_dirs = test_params(train_rootdir, birds) model = 'x'.join( [classifierType, str(mtStep), str(mtWin), str(stStep), str(stWin)]) model_path = os.path.join(os.getcwd(), model) if not os.path.exists(model): train_model(list_of_dirs=train_dirs, mtWin=mtWin, mtStep=mtStep, stWin=stWin, stStep=stStep, classifierType=classifierType, modelName=model) png_path = '.'.join([model_path, 'png']) stats_path = '.'.join([model_path, 'stats']) if not os.path.exists(png_path) and not os.path.exists(stats_path): stats = clean_and_test(directory=test_rootdir, classifierType=classifierType, no_sanitize=True, skip_clean=skip_clean, show_graphs=False, model_file=model_path, birds=birds, verbose=False) with open(stats_path, 'w') as stats_file: cPickle.dump(stats, stats_file) except: if debug: raise else: return
def test(path_excel_dir, para_name, para, data, iter_num): for i in range(iter_num): print_params(para_name, para) path_excel = path_excel_dir + str(int(time.time())) + str(int(rd.uniform(100, 900))) + '.xlsx' save_params(para_name, para, path_excel) _ = train_model(para, data, path_excel) if para[2] not in ['GCMC', 'NGCF', 'SCF', 'CGMC', 'LightGCN']: tf.reset_default_graph()
def train(self, plot_stats=False, show_qsummary=False): while any(m.activated for m in self.modelList): while self.current_model < len(self.modelList): self.save(f'lesson_{self.current_lesson:03d}.model_{self.current_model:03d}') m = self.modelList[self.current_model] if m.activated: #train for one lesson and output somewhere m.build_model() m.total_episodes += train_model(m.model, m.curriculum, m.cfg, stats_filename = m.stats_filename, max_lesson = self.current_lesson, initial_episode = m.total_episodes, plot_stats = plot_stats, show_qsummary = show_qsummary) K.clear_session() m.destroy_model() gc.collect() self.current_model += 1 for m in self.modelList: if m.activated and (m.curriculum.lesson_num() <= self.current_lesson or m.curriculum.is_completed()): m.activated = False m.completed = m.curriculum.is_completed() self.summary.append(f'Model {m.name} deactivated during round {self.current_lesson}\nCompleted: {m.completed}') # analyze aggregate population stats and deactivate poor performers self.current_lesson+=1 self.current_model = 0 self.save(f'lesson_{self.current_lesson:03d}.model_{self.current_model:03d}') for s in self.summary: print(s)
def train_model( self, train_loader, test_loader, validation_loader, num_inference_samples, max_epochs, early_stopping_patience, desc, log_interval, device, epoch_results_store=None, ): model = self.create_bayesian_model(device) optimizer = self.create_optimizer(model) num_epochs, test_metrics = train_model( model, optimizer, max_epochs, early_stopping_patience, num_inference_samples, test_loader, train_loader, validation_loader, log_interval, desc, device, epoch_results_store=epoch_results_store, **self.create_train_model_extra_args(optimizer), ) return model, num_epochs, test_metrics
def main(args): rePrint('-------------------------------------------------------------------------------------') vadDict = pickle.load(open(args.vad_file, 'rb')) validFile = os.path.join(args.valid_path, args.valid_file.format(args.split_num-1)) rePrint(' [LoadModelData {:s}]'.format(validFile)) #validData = LoadModelData(validFile, 'valid') validData = LoadModelDataVAD(validFile, vadDict, args.alpha, 'valid') validLoad = torch.utils.data.DataLoader(validData, batch_size=args.batch_size, shuffle=False, num_workers=4) rePrint(' [Create model: {:s} num_classes: {:d}]'.format(args.model, args.num_classes)) net = eval(args.model)(num_classes=args.num_classes) net = net.cuda() optimizer = torch.optim.SGD(net.parameters(), lr=args.lr, momentum=args.momentum, weight_decay=args.weight_decay) scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=args.factor, patience=args.patience, verbose=False, threshold=1e-6, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-8) rePrint('') rePrint('-------------------------------------------------------------------------------------') rePrint(' [Train {:s}]'.format(args.model)) rePrint('-------------------------------------------------------------------------------------') modelPath = os.path.join(args.model_path, args.model+args.fea_type) make_path(modelPath) for epoch in range(args.epochs): #print type(epoch), epoch save_file = os.path.join(modelPath, args.model_file).format(epoch) for splitID in range(args.split_num-1): train_file = os.path.join(args.train_path, args.train_file.format(splitID)) rePrint(' [LoadModelData {:s}]'.format(train_file)) #trainData = LoadModelData(train_file, 'train') trainData = LoadModelDataVAD(train_file, vadDict, args.alpha, 'train') trainLoad = torch.utils.data.DataLoader(trainData, batch_size=args.batch_size, shuffle=True, num_workers=4) net = train_model(net=net, trainLoad=trainLoad, optimizer=optimizer, log_interval=args.log_interval, epoch=epoch, batch_size=args.batch_size, lr=optimizer.param_groups[0]['lr'], save_file=None) torch.save(net.state_dict(), save_file) rePrint(save_file) eval_loss = valid_model(net=net, validLoad=validLoad) scheduler.step(eval_loss) rePrint('-------------------------------------------------------------------------------------') rePrint(' [Done]') rePrint('-------------------------------------------------------------------------------------')
def plot_cost_function(data, max_iterations, ratio): utils.print_colored("[cost_function][Plotting cost function...] start!", utils.bcolors.YELLOW) cost = [] utils.print_colored("[cost_function][Calculating cost function...] start!", utils.bcolors.YELLOW) for _ in range(1, max_iterations + 1): data.norm_theta0, data.norm_theta1 = train_model.train_model( ratio, data.norm_theta0, data.norm_theta1, data.norm_tab_x, data.norm_tab_y) cost.append(cost_function(data)) utils.print_colored( "[cost_function][Calculating cost function...] finished!", utils.bcolors.GREEN) utils.print_colored("[cost_function][Generating figure...] start!", utils.bcolors.YELLOW) max_iter_table = np.arange(max_iterations) fig = go.Figure(data=go.Scatter(x=max_iter_table, y=cost)) fig.show() utils.print_colored("[cost_function][Generating figure...] finished!", utils.bcolors.GREEN) utils.print_colored( "[cost_function][Plotting cost function...] finished!\n", utils.bcolors.GREEN)
def test_train_model(): """Test if train_model returns a xgboost classification model """ path = os.getcwd() df_input = pd.read_csv(path + '/test/data/test_data.csv') kwarg_dic = { 'method': 'xgboost', 'get_target': { 'target': 'TARGET' }, 'choose_features_all': { 'features_to_use': [ 'DAYS_BIRTH', 'DAYS_EMPLOYED', 'DAYS_EMPLOYED_PERC', 'BURO_DAYS_CREDIT_MEAN', 'DAYS_ID_PUBLISH', 'ANNUITY_INCOME_PERC', 'INSTAL_DAYS_ENTRY_PAYMENT_MEAN', 'INSTAL_DBD_MEAN', 'PAYMENT_RATE', 'INCOME_CREDIT_PERC', 'INSTAL_AMT_PAYMENT_MEAN', 'APPROVED_DAYS_DECISION_MEAN', 'DAYS_LAST_PHONE_CHANGE', 'BURO_DAYS_CREDIT_ENDDATE_MEAN' ] }, 'split_data': { 'train_size': 0.9, 'test_size': 0.1 }, 'parameter': { 'objective': 'binary:logistic', 'n_estimators': 300, 'learning_rate': 0.2, 'max_depth': 3 } } model = train_model.train_model(df_input, **kwarg_dic) assert (str(type(model))) == "<class 'xgboost.sklearn.XGBClassifier'>"
def main(args): print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU'))) base_output_directory = dir_utils.clean_directory_string(args.o[0]) dir_utils.mkdir(base_output_directory) ref_file = args.fa[0] read_file = args.fq[0] batch_size = args.bs[0] rnn_dim = args.hu[0] embedding_dim = args.ed[0] epochs = args.e[0] min_seed_length = args.sl[0] replicates = args.r[0] prediction_length = args.pl[0] patience = args.es[0] seed_range_upper = args.sr[0] replicate_offset = args.os[0] beam_length = args.bl[0] terminal_directory_character = dir_utils.get_terminal_directory_character() gap_id = ref_file.split(terminal_directory_character)[-1].split(".")[0] base_output_directory += gap_id for i in range(replicates): replicate_num = i + replicate_offset output_directory = dir_utils.clean_directory_string(base_output_directory + "_R_" + str(replicate_num)) dir_utils.mkdir(output_directory) print("Training model") train.train_model(output_directory, min_seed_length, ref_file, read_file, epochs, [batch_size], [rnn_dim], [embedding_dim], 1, patience, seed_range_upper) print("Greedy prediction") greedy.predict_reference(output_directory, gap_id, ref_file, embedding_dim, rnn_dim, min_seed_length, base_path=output_directory) greedy.predict_arbitrary_length(output_directory, gap_id, ref_file, embedding_dim, rnn_dim, prediction_length, base_path=output_directory) print("Beam search prediction") beam_search.predict_reference(output_directory, gap_id, ref_file, embedding_dim, rnn_dim, min_seed_length, beam_length, base_path=output_directory) beam_search.predict_arbitrary_length(output_directory, gap_id, ref_file, embedding_dim, rnn_dim, prediction_length, beam_length, base_path=output_directory) reset_states()
def plot_linear_regression(data, slope, max_iterations, ratio): utils.print_colored( "[linear_regression][Plotting linear regression...] start!", utils.bcolors.YELLOW) graph_data = {} graph_data["figure"] = generate_fig(max_iterations, "Kilometers", "Price") graph_data["sliders"] = generate_sliders() # Build initial figure data, and frame_data for i == 0 utils.print_colored( "[linear_regression][Building initial figure and frame datas...] start!", utils.bcolors.YELLOW) estimations = [] for x in data.tab_x: estimations.append(est.estimate_price(x, data.theta0, data.theta1)) build_fig_data_linear_regression(data, estimations, graph_data) build_frame_data_linear_regression(data, estimations, 0, graph_data) utils.print_colored( "[linear_regression][Building initial figure and frame datas...] finished!", utils.bcolors.GREEN) # Iterate max_iterations times through the dataset to train the model utils.print_colored("[linear_regression][Building frame data...] start!", utils.bcolors.YELLOW) for i in range(1, max_iterations + 1): data.norm_theta0, data.norm_theta1 = train_model.train_model( ratio, data.norm_theta0, data.norm_theta1, data.norm_tab_x, data.norm_tab_y) # Every slope times, we make a snapshot of the estimations with the current thetas (slope can be equal to 1) if i % slope == 0: # De-normalize our thetas to use them for the estimations data.theta0 = utils.de_normalize(data.norm_theta0, data.max_y, data.min_y) data.theta1 = utils.de_normalize( data.norm_theta1, data.max_y, data.min_y) / (data.max_x - data.min_x) estimations = [] # Iterate through the dataset to estimate for each entry for x in data.tab_x: estimations.append( est.estimate_price(x, data.theta0, data.theta1)) build_frame_data_linear_regression(data, estimations, i, graph_data) utils.print_colored( "[linear_regression][Building frame data...] finished!", utils.bcolors.GREEN) utils.print_colored("[linear_regression][Generating figure...] start!", utils.bcolors.YELLOW) graph_data["figure"]["layout"]["sliders"] = [graph_data["sliders"]] fig = go.Figure(graph_data["figure"]) fig.show() utils.print_colored("[linear_regression][Generating figure...] finished!", utils.bcolors.GREEN) utils.print_colored( "[linear_regression][Plotting linear regression...] finished!\n", utils.bcolors.GREEN)
def new_data(): words, labels, docs_x, docs_y = extract_data() training, output = onehot_encoding(words, labels, docs_x, docs_y) model = train_model(training, output) # new_data() # chat() # print (ask("Tell me about your courses")) # new_data()
def refresh_model(self): """ Model will be trained on this button press. All the pending cases will be downloaded from db and a KNN Classifier will be trained on it. """ if train_model() is True: QMessageBox.about(self, "Success", "Model is trained") else: QMessageBox.about(self, "Error", "Something went wrong")
def main(opt): start_time = time.time() train_bow_loader, valid_bow_loader, word2idx, idx2word, vocab, bow_dictionary \ = load_data_and_vocab(opt, load_train=True) opt.bow_vocab_size = len(bow_dictionary) load_data_time = time_since(start_time) logging.info('Time for loading the data: %.1f' % load_data_time) start_time = time.time() ntm_model = NTM(opt).to(opt.device) optimizer_ntm = init_optimizers(ntm_model, opt) train_model.train_model(ntm_model, optimizer_ntm, bow_dictionary, train_bow_loader, valid_bow_loader, opt) training_time = time_since(start_time) logging.info('Time for training: %.1f' % training_time) return
def train(): feature_extraction_config = { 'text_to_vector_uni_vocabulary': 'vocabularies/text_to_vector_uni_vocabulary_10.txt', 'text_to_vector_bi_vocabulary': 'vocabularies/text_to_vector_bi_vocabulary.txt', 'tf_idf_vector': False, 'counter_vector': True, 'binary_vector': False, 'best_representing_words_list': 'vocabularies/fast_rank_best_words_custom.txt', 'surrounding_words': True, 'polarity_vocabulary': 'vocabularies/polarity_words.txt', 'positive_words_count': True, 'negative_words_count': True, 'polarity_count': True, 'parts_of_speech': True, 'uni_gram': True, 'bi_gram': False, 'not_count': False, 'remove_stop_words': False } class_map = { 0: 0, 1: 1, # 2: 2, 3: 3 } clf = SVC(kernel='linear') train_model.train_model(clf, src_path=src_path, data_field=data_field, target_field=target_field, export_path='models/fast_rank_model.pkl', class_map=class_map, balance_classes=False, randomize=False, feature_config=feature_extraction_config)
def train_xz_init_model(num_classes, epochs): #This needs to be set according to your directory #Needs to contain /rawdata and /groundtruth with equally named mhd-files model_name = "XZ-Unet-Init" # create a new model folder based on name and date now = datetime.datetime.now() now_str = now.strftime('%Y-%m-%d_%H-%M-%S') rootDir = "C:/users/jfauser/IPCAI2019/ModelData/" + model_name + "/" print("Started at {}".format(now_str)) input_shape = (128, 512) model = unet(input_shape + (1, ), num_classes) batch_size = 8 print("loading data set 1") images_1, labels_1 = get_data_set(rootDir + "dataset1.h5") print("loading data set 2") images_2, labels_2 = get_data_set(rootDir + "dataset2.h5") out_dir = rootDir + now_str + "/" filename_weigths1 = out_dir + "weights1.hdf5" filename_weigths2 = out_dir + "weights2.hdf5" if not os.path.isdir(out_dir): os.mkdir(out_dir) print("train model 1") train_model.train_model([images_1, labels_1], filename_weigths1, model, batch_size, epochs) # commences training on the set del model print("train model 2") model = unet(input_shape + (1, ), num_classes) train_model.train_model([images_2, labels_2], filename_weigths2, model, batch_size, epochs) # commences training on the set del model
def main(args): n_user, n_item, train_rec, eval_rec, test_rec = load_rating() n_entity, n_relation, kg = load_kg() kg_data = (kg[:, 0], kg[:, 1], kg[:, 2]) rec_data = (train_rec[:, 0], train_rec[:, 1], train_rec[:, 2]) rec_val = (eval_rec[:, 0], eval_rec[:, 1], eval_rec[:, 2]) train_data_kg = TrainSet(kg_data) train_loader_kg = DataLoader(train_data_kg, batch_size=args.batch_size, shuffle=args.shuffle_train) train_data_rec = TrainSet(rec_data) eval_data_rec = TrainSet(rec_val) train_loader_rec = DataLoader(train_data_rec, batch_size=args.batch_size, shuffle=args.shuffle_train) eval_loader_rec = DataLoader(eval_data_rec, batch_size=args.batch_size, shuffle=args.shuffle_test) model = MultiKR(n_user + 1, n_item + 1, n_entity + 1, n_relation + 1, n_layer=args.n_layer, embed_dim=args.batch_size, hidden_layers=args.hidden_layers, dropouts=args.dropouts, output_rec=args.output_rec) optimizer = torch.optim.Adam(model.parameters(), weight_decay=args.weight_decay, lr=args.lr) loss_function = nn.BCEWithLogitsLoss() epochs = args.epochs train_model(model, train_loader_rec, train_loader_kg, eval_loader_rec, optimizer, loss_function, epochs)
def do_predict(need_train=True, need_save=True): # data preprocessor data_path = base_path + market_info_path if os.path.exists(data_path): market_info = pd.read_pickle(data_path) else: market_info = query_and_clean() market_info.to_pickle(data_path) training_set, test_set, lstm_training_inputs, lstm_training_outputs, lstm_test_inputs, lstm_test_outputs \ = data_preprocessor(market_info, split_date, window_len, currency_type=currency_type) # train or load model if need_train: model = train_model(lstm_training_inputs, lstm_training_outputs, need_save_model=need_save) else: model = load_model(persistence_path) training_x_ticks = [ datetime.date(i, j, 1) for i in range(2013, 2019) for j in [1, 5, 9] ] training_x_ticks_labels = [ datetime.date(i, j, 1).strftime('%b %Y') for i in range(2013, 2019) for j in [1, 5, 9] ] test_x_ticks = [datetime.date(2017, i + 1, 1) for i in range(12)] test_x_ticks_labels = [ datetime.date(2017, i + 1, 1).strftime('%b %d %Y') for i in range(12) ] # Eth predict plot predict_and_plot(lstm_training_inputs, training_set['eth_Close'], training_set, training_x_ticks, training_x_ticks_labels, model, "[ETH] Training Set: Single Timepoint Prediction", need_zoom=True) predict_and_plot(lstm_test_inputs, test_set['eth_Close'], test_set, test_x_ticks, test_x_ticks_labels, model, "[ETH] Test Set: Single Timepoint Prediction", need_zoom=False)
#!/usr/bin/env python import sys import os.path from sklearn.ensemble import RandomForestClassifier sys.path.insert(0, '.') from train_model import train_model submission_file = 'submission_rf_f4imp_all.csv' features_files = ['data/Dog_1/features_02.txt', 'data/Dog_2/features_01.txt', 'data/Dog_3/features_01.txt', 'data/Dog_4/features_01.txt', 'data/Dog_5/features_01.txt', 'data/Patient_1/features_01.txt', 'data/Patient_2/features_01.txt'] model, train_prob, test_prob = train_model(features_files, [11, 5, 4, 6], RandomForestClassifier, {'n_estimators': 50, 'criterion': 'entropy', 'min_samples_leaf': 10}, outlier_sigma=2, n_cv=20, plot=True, save_settings=True, submission_file=submission_file, verbose=True) raw_input('Press Enter to end.')
import sys import os.path from sklearn.linear_model import LogisticRegression sys.path.insert(0, '.') from train_model import train_model submission_file = 'submission_log_reg_f1to3_rocslope10.csv' features_files = [['data/Dog_1/features_02.txt'], ['data/Dog_2/features_01.txt'], ['data/Dog_3/features_01.txt'], ['data/Dog_4/features_01.txt'], ['data/Dog_5/features_01.txt'], ['data/Patient_1/features_01.txt'], ['data/Patient_2/features_01.txt']] features_cols = [[8, 21], [12, 13], [3, 17], [11], [7, 10, 11], [10, 15], [5]] C_vals = [0.01, 0.1, 1, 0.01, 10, 0.01, 10] for ff, fc, C in zip(features_files, features_cols, C_vals): model, train_prob, test_prob = train_model(ff, fc, LogisticRegression, {'C': C, 'class_weight': 'auto'}, outlier_sigma=2, n_cv=100, normalize_probs='ROCSlope', plot=True, save_settings=True, submission_file=submission_file, verbose=True) raw_input('Press Enter to end.')
def optimize_model(features_files, submission_file, classifier=linear_model.LogisticRegression, feature_columns=range(2, 27), min_features=1, max_features=1, parameters={'C': np.logspace(-3, 1, 5), 'class_weight': ['auto']}, **kwargs): """ Find the combination of features (selected from columns listed in feature_columns in the files listed in features_files, including 1 to max_features different features) and model parameters that optimizes the AUC for the chosen classifier, then update predicted test probabilities in submission_file. In the parameters dictionary, the keys are the keywords to be used when initializing the classifier, and each value is an array of arguments to loop over when searching for the best model. Additional settings are passed to train_model in kwargs. """ best_model = {'AUC': 0.} # vary number of features if min_features > max_features: sys.exit('min_features must be <= max_features') for n_features in range(min_features, max_features+1): # vary features used in model training if n_features == min_features: if min_features == 1: feature_columns_grid = [[i] for i in feature_columns] else: feature_columns_grid = list(itertools.combinations( \ feature_columns, min_features)) elif len(best_model['columns']) < n_features-1: # exit loop since last iteration didn't add new features break else: remaining_features = list(feature_columns) for i in best_model['columns']: remaining_features.remove(i) feature_columns_grid = [list(best_model['columns']) + [i] \ for i in remaining_features] for f_cols in feature_columns_grid: # vary model parameters for model_args in list(itertools.product( \ *[[(k, v) for v in parameters[k]] \ for k in parameters.keys()])): # train the classifier and compute AUC model, auc_mean, auc_std = train_model(features_files, f_cols, classifier, dict(model_args), **kwargs) print '\r' + ', '.join([str(fc) for fc in f_cols]) + \ ' AUC = {0:.2f}+/-{1:.2f}'.format(auc_mean, auc_std), sys.stdout.flush() if auc_mean > best_model['AUC']: print '\n ', model_args best_model = {'AUC': auc_mean, 'columns': f_cols, 'parameters': model_args} print '\r' + 70*' ' + '\n' # compute predictions for best model, update submission CSV, # and plot learning curves and ROC curve model, auc_mean, auc_std = train_model(features_files, best_model['columns'], classifier, dict(best_model['parameters']), submission_file=submission_file, save_settings=True, plot=True, **kwargs)
sys.path.insert(0, '.') from train_model import train_model submission_file = 'submission_svm_f1to2_rocslope5.csv' features_files = [['data/Dog_1/features_02.txt'], ['data/Dog_2/features_01.txt'], ['data/Dog_3/features_01.txt'], ['data/Dog_4/features_01.txt'], ['data/Dog_5/features_01.txt'], ['data/Patient_1/features_01.txt'], ['data/Patient_2/features_01.txt']] features_cols = [[5, 11], [13], [3, 7], [5, 11], [14], [4, 11], [17, 18]] C_vals = [10, 10, 0.1, 0.1, 0.1, 10, 10] gamma_vals = [1, 0.5, 0.25, 0.25, 1, 1, 1] for ff, fc, C, gamma in zip(features_files, features_cols, C_vals, gamma_vals): model, train_prob, test_prob = train_model(ff, fc, SVC, {'C': C, 'kernel': 'rbf', 'gamma': gamma, 'probability': True, 'class_weight': 'auto'}, outlier_sigma=2, n_cv=10, normalize_probs='ROCSlope', plot=True, save_settings=True, submission_file=submission_file, verbose=True) raw_input('Press Enter to end.')
ssplit90 = json.load(f) srefdf_t = filter_by_filelist(srefdf, ssplit90['train']) ### III. Preprocess: remove relational expressions from training srefdf_tr = filter_relational_expr(srefdf_t) ### IV. Set list of words to train wordlist = wordlist_by_criterion(srefdf_tr, model['wrdl'], model['wprm']) ### V. Get the region features X = np.load('../../ExtractFeats/ExtrFeatsOut/saiapr.npz') X = X['arr_0'] ### VI. And... train away! clsf = train_model(srefdf_tr, X, wordlist, (linear_model.LogisticRegression, {'penalty':'l1'}), nneg=model['nneg'], nsrc=model['nsrc']) with gzip.open('../TrainedModels/' + basename + '.pklz', 'w') as f: pickle.dump(clsf, f) with open('../TrainedModels/' + basename + '.json', 'w') as f: json.dump(model, f) print strftime("%Y-%m-%d %H:%M:%S")