def save_aucs(self, summary_path, AUCs_path, **kwargs): summaries = summaryProcessor.get_summaries_from_path(summary_path) if not os.path.exists(AUCs_path): Path(AUCs_path).mkdir(parents=True, exist_ok=False) for _, summary in summaries.df.iterrows(): filename = summary.training_output_path.split("/")[-1] utils.set_random_seed(summary.seed) data_processor = DataProcessor(summary=summary) data_loader = self.__get_data_loader(summary) auc_params = self.model_evaluator.get_aucs( summary=summary, AUCs_path=AUCs_path, filename=filename, data_processor=data_processor, data_loader=data_loader, **kwargs) if auc_params is None: continue (aucs, auc_path, append, write_header) = auc_params self.__save_aucs_to_csv(aucs=aucs, path=auc_path, append=append, write_header=write_header)
def get_qcd_test_data(self, summary, normalize=False): utils.set_random_seed(summary.seed) data_processor = DataProcessor(summary=summary) data_loader = self.__get_data_loader(summary) return self.model_evaluator.get_qcd_test_data(summary, data_processor, data_loader, normalize=normalize)
def __init__( self, # general settings of the training model_trainer_path, validation_data_fraction, test_data_fraction, include_hlf, include_efp, hlf_to_drop, # arguments that will be passed to the specialized trainer class **training_settings): """ Constructor of the general Trainer class, which will delegate architecture-specific tasks to a specialized Trainer class. """ # Import correct specialized class self.model_class = utils.import_class(model_trainer_path) # Save general training arguments self.validation_data_fraction = validation_data_fraction self.test_data_fraction = test_data_fraction self.include_hlf = include_hlf self.include_efp = include_efp self.hlf_to_drop = hlf_to_drop # Draw, set and save random seed self.seed = np.random.randint(0, 99999999) utils.set_random_seed(self.seed) # Save training output path (used to save the model later on) self.training_output_path = training_settings["training_output_path"] # Prepare data processor and data loader for the specialized class data_processor = DataProcessor( validation_fraction=validation_data_fraction, test_fraction=test_data_fraction, seed=self.seed) data_loader = DataLoader() data_loader.set_params(include_hlf=include_hlf, include_eflow=include_efp, hlf_to_drop=hlf_to_drop) # Initialize specialized trainer object self.model_trainer = self.model_class(data_processor=data_processor, data_loader=data_loader, **training_settings)
def __init__( self, args, train_dataloader, valid_dataloader, model, device=torch.device("cpu"), patience=5, ): self.args = args self.device = device # data self.num_class = train_dataloader.dataset.num_classes self.train_dataloader = train_dataloader self.valid_dataloader = valid_dataloader # model self.model = model self.model.to(self.device) # optimizer self.set_optimizer() # metric self.set_metrics() self.best_val = 0.0 self.best_train = 0.0 # early stopping self.patience = patience self.es_count = self.patience # save dir self.save_path = os.path.join(self.args.model_dir, "model_best.pkl") if not os.path.isdir(self.args.model_dir): os.mkdir(self.args.model_dir) # tensorboardX writer self.writer = SummaryWriter(os.path.join(self.args.model_dir, "train_logs")) # set random seed set_random_seed(self.args.random_seed)
def draw_roc_curves(self, summary_path, summary_version, **kwargs): summaries = summaryProcessor.get_summaries_from_path(summary_path) plotting_args = { k: v for k, v in kwargs.items() if k not in ["signals", "signals_base_path"] } fig, ax_begin, ax_end, plt_end, colors = self.__get_plot_params( n_plots=1, **plotting_args) ax = ax_begin(0) for _, summary in summaries.df.iterrows(): version = summaryProcessor.get_version(summary.summary_path) if version != summary_version: continue utils.set_random_seed(summary.seed) kwargs["filename"] = summary.training_output_path.split("/")[-1] data_processor = DataProcessor(summary=summary) data_loader = self.__get_data_loader(summary) self.model_evaluator.draw_roc_curves(summary=summary, data_processor=data_processor, data_loader=data_loader, ax=ax, colors=colors, **kwargs) x = [i for i in np.arange(0, 1.1, 0.1)] ax.plot(x, x, '--', c='black') ax_end("false positive rate", "true positive rate") plt_end() plt.show()
parser.add_argument("target_domain", type=str, help="Target domain.") parser.add_argument("output_path", type=str, help="Output path to store prediction.") parser.add_argument("--batch_size", type=int, default=32, help="Batch size.") parser.add_argument("--random_seed", type=int, default=42, help="random seed.") args = parser.parse_args() set_random_seed(args.random_seed) dataset = DigitTestDataset(args.images_dir) dataloader = DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=8) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Model dir if args.target_domain == "svhn": model_dir = os.path.join("models", "dsn_m2s", "model_best.pth.tar") else: model_dir = os.path.join("models", "dsn_s2m", "model_best.pth.tar") # Models
import random import argparse import numpy as np import torch import torch.nn.functional as F import matplotlib.pyplot as plt from torch.utils.data import DataLoader from sklearn.manifold import TSNE from module.utils import set_random_seed from module.dataset.digit import DigitDataset from module.da.dann import DANN from module.dsn.dsn import DSN set_random_seed(42) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") def plot_tsne( source_features_ls, source_labels_ls, source_domains_ls, target_features_ls, target_labels_ls, target_domains_ls, fname, sample_size=250, ): source_features_ls = np.concatenate(source_features_ls) source_labels_ls = np.concatenate(source_labels_ls)
fig_1_path = os.path.join(args.output_dir, "fig1_2.jpg") fig_2_path = os.path.join(args.output_dir, "fig2_2.jpg") dcgen = DCGen(latent_dim=128) dcgen.load_state_dict(torch.load(args.dcgan_model)) dcgen.to(device) acgen = ACGen(latent_dim=120, n_feature_maps=128, embedding_dim=4, n_attributes=1) acgen.load_state_dict(torch.load(args.acgan_model)) acgen.to(device) # prepare dc noise set_random_seed(int(args.random_seed[0])) # dc_noise = torch.randn(64, 128, 1, 1, device=device) # dc_noise[1], dc_noise[2], dc_noise[5] = dc_noise[42], dc_noise[50], dc_noise[54] # dc_noise[9], dc_noise[18], dc_noise[28] = dc_noise[60], dc_noise[32], dc_noise[34] # dc_noise[27] = dc_noise[63] # dc_noise = dc_noise[:32] # tmp_dc_noise = dc_noise.detach().cpu().numpy() # with open("models/dcgan/fixed_noise.pkl", "wb") as fout: # pickle.dump(tmp_dc_noise, fout) with open(os.path.join("models", "dcgan", "fixed_noise.pkl"), "rb") as fin: dc_noise = pickle.load(fin) dc_noise = torch.tensor(dc_noise).float() dc_noise = dc_noise.to(device)
def get_error(self, input_data, summary, scaler=None): utils.set_random_seed(summary.seed) data_processor = DataProcessor(summary=summary) return self.model_evaluator.get_error(input_data, summary, data_processor, scaler)
def get_signal_test_data(self, name, path, summary): utils.set_random_seed(summary.seed) data_processor = DataProcessor(summary=summary) data_loader = self.__get_data_loader(summary) return self.model_evaluator.get_signal_test_data( name, path, summary, data_processor, data_loader)
def __init__( self, qcd_path, training_params, bottleneck_size, intermediate_architecture=(30, 30), test_data_fraction=0.15, validation_data_fraction=0.15, norm_type="", norm_args=None, hlf_to_drop=None, ): """ Creates auto-encoder trainer with random seed, provided training parameters and architecture. Loads specified data, splits them into training, validation and test samples according to provided arguments. Normalizes the data as specified by norm_percentile. High-level features specified in hlf_to_drop will not be used for training. """ if hlf_to_drop is None: hlf_to_drop = ['Energy', 'Flavor'] self.seed = np.random.randint(0, 99999999) utils.set_random_seed(self.seed) self.qcd_path = qcd_path self.hlf_to_drop = hlf_to_drop self.training_params = training_params self.test_data_fraction = test_data_fraction self.validation_data_fraction = validation_data_fraction data_loader = DataLoader() # Load QCD samples (self.qcd, qcd_jets, qcd_event, qcd_flavor) = data_loader.load_all_data(qcd_path, "qcd background", include_hlf=True, include_eflow=True, hlf_to_drop=hlf_to_drop) data_processor = DataProcessor( validation_fraction=self.validation_data_fraction, test_fraction=self.test_data_fraction, seed=self.seed) (train_data, validation_data, test_data, _, _) = data_processor.split_to_train_validate_test(data_table=self.qcd) train_data.output_file_prefix = "qcd training data" validation_data.output_file_prefix = "qcd validation data" # Normalize the input self.norm_type = norm_type self.norm_args = norm_args self.data_ranges = np.asarray([]) self.means_train, self.stds_train = None, None self.means_validation, self.stds_validation = None, None if norm_type == "Custom": self.data_ranges = utils.percentile_normalization_ranges( train_data, norm_args["norm_percentile"]) elif norm_type == "CustomStandard": self.means_train, self.stds_train = train_data.get_means_and_stds() self.means_validation, self.stds_validation = validation_data.get_means_and_stds( ) print("Trainer scaler args: ", self.norm_args) self.train_data_normalized = data_processor.normalize( data_table=train_data, normalization_type=self.norm_type, norm_args=self.norm_args, data_ranges=self.data_ranges, means=self.means_train, stds=self.stds_train) self.validation_data_normalized = data_processor.normalize( data_table=validation_data, normalization_type=self.norm_type, norm_args=self.norm_args, data_ranges=self.data_ranges, means=self.means_validation, stds=self.stds_validation) # Build the model self.input_size = len(self.qcd.columns) self.intermediate_architecture = intermediate_architecture self.bottleneck_size = bottleneck_size self.model = self.get_auto_encoder_model()