def fit_validate(exp_params, k, data_path, write_path, others=None, custom_tag=''): """Fit model and compute metrics on train and validation set. Intended for hyperparameter search. Only logs final metrics and scatter plot of final embedding. Args: exp_params(dict): Parameter dict. Should at least have keys model_name, dataset_name & random_state. Other keys are assumed to be model parameters. k(int): Fold identifier. data_path(str): Data directory. write_path(str): Where to write temp files. others(dict): Other things to log to Comet experiment. custom_tag(str): Custom tag for comet experiment. """ # Comet experiment exp = Experiment(parse_args=False) exp.disable_mp() custom_tag += '_validate' exp.add_tag(custom_tag) exp.log_parameters(exp_params) if others is not None: exp.log_others(others) # Parse experiment parameters model_name, dataset_name, random_state, model_params = parse_params(exp_params) # Fetch and split dataset. data_train = getattr(grae.data, dataset_name)(split='train', random_state=random_state, data_path=data_path) data_train, data_val = data_train.validation_split(random_state=FOLD_SEEDS[k]) # Model m = getattr(grae.models, model_name)(random_state=FOLD_SEEDS[k], **model_params) m.write_path = write_path m.data_val = data_val with exp.train(): m.fit(data_train) # Log plot m.comet_exp = exp m.plot(data_train, data_val, title=f'{model_name} : {dataset_name}') # Probe embedding prober = EmbeddingProber() prober.fit(model=m, dataset=data_train, mse_only=True) train_z, train_metrics = prober.score(data_train, is_train=True) # Log train metrics exp.log_metrics(train_metrics) with exp.validate(): val_z, val_metrics = prober.score(data_val) # Log train metrics exp.log_metrics(val_metrics) # Log marker to mark successful experiment exp.log_other('success', 1)
def __init__(self, config_file, tag_list): self.tag_list = tag_list super().__init__() with open(config_file, 'r') as f: comet_config = json.load(f) comet_exp = Experiment(**comet_config) for tag in tag_list: comet_exp.add_tag(tag) self.exp = comet_exp
def main(): device = 'cuda' if torch.cuda.is_available() else 'cpu' generator = Generator(n_res_blocks=16, n_ps_blocks=2) hst_path = "../data/samples/hst/filtered_restricted" hsc_path = "../data/samples/hsc/filtered_restricted" api_key = os.environ['COMET_ML_ASTRO_API_KEY'] # Create an experiment with your api key experiment = Experiment( api_key=api_key, project_name="Super Resolution GAN: HSC->HST", workspace="samkahn-astro", ) experiment.add_tag("test tag") dataloader = torch.utils.data.DataLoader(SR_HST_HSC_Dataset( hst_path=hst_path, hsc_path=hsc_path, hr_size=[600, 600], lr_size=[100, 100], transform_type="log_scale"), batch_size=1, pin_memory=True, shuffle=True, collate_fn=collate_fn) generator = train_srresnet(generator, dataloader, device, experiment, lr=1e-4, total_steps=1e5, display_step=50) torch.save(generator, 'srresnet_median_scale.pt') generator = torch.load('srresnet_median_scale.pt') discriminator = Discriminator(n_blocks=1, base_channels=8) generator, discriminator = train_srgan(generator, discriminator, dataloader, device, experiment, lr=1e-4, total_steps=1e5, display_step=1000) torch.save(generator, 'srresnet_median_scale.pt') torch.save(discriminator, 'srdiscriminator_median_scale.pt')
def start_comet(args): exp = None if args.comet is not None and len(args.comet) > 0: workspace, project, apikey = args.comet.split("/") exp = Experiment(api_key=apikey, project_name=project, workspace=workspace) exp.set_name("td3") if len(args.comet_tags) > 0: comet_tags = args.comet_tags.split(",") for tag in comet_tags: exp.add_tag(tag) return exp
def run(loss, lr, loss_a1, loss_a2): exp = Experiment(workspace="pose-refinement", project_name="02-batch-shuffle-pretrained") exp.add_tag("no bad frames") if args.output is None: output_path = f"../models/{exp.get_key()}" else: output_path = args.output params = { "num_epochs": 15, "preprocess_2d": "DepthposeNormalize2D", "preprocess_3d": "SplitToRelativeAbsAndMeanNormalize3D", "shuffle": True, "ordered_batch": True, # training "optimiser": "adam", "adam_amsgrad": True, "learning_rate": lr, "sgd_momentum": 0, "batch_size": 1024, "train_time_flip": False, # True, "test_time_flip": True, "lr_scheduler": { "type": "multiplicative", "multiplier": 0.95, "step_size": 1, }, # dataset "train_data": "mpii+muco", "pose2d_type": "hrnet", "pose3d_scaling": "normal", "megadepth_type": "megadepth_at_hrnet", "cap_25fps": True, "stride": 2, "simple_aug": False, # True, # augments data by duplicating each frame "weights": "29cbfa0fc1774b9cbb06a3573b7fb711", "model": { "loss": loss, "loss_a1": loss_a1, "loss_a2": loss_a2 }, } run_experiment(output_path, params, exp) eval.main(output_path, False, exp)
def main(cfg): shapes = cfg.model.shapes opt_params = cfg.optimizer.params experiment = Experiment(log_code=False) experiment.set_code(filename=hydra.utils.to_absolute_path(__file__)) experiment.add_tag("with_hydra") experiment.log_parameters({"hydra-cfg": [cfg]}) model = layers.MLP(shapes) optimizer = optim.Adam(model.parameters(), **opt_params) runner = tasks.ClassificationRunner( model, optimizer=optimizer, criterion=nn.CrossEntropyLoss(), experiment=experiment ) runner.fit(x, y, epochs=10, checkpoint_path="./checkpoints") runner.save()
def setup_comet(args, init_distributed): if init_distributed and args.distributed_rank>0: # Only the rank 0 process handled comet args.comet = False if args.comet: # This will only be true if the user set the --comet flag, and the rank is 0 print('Activating comet') experiment = Experiment(api_key=api_key, project_name=args.comet_project, workspace=workspace, auto_param_logging=False, auto_metric_logging=False, parse_args=True, auto_output_logging=True, log_env_gpu=False ) # experiment.disable_mp() # Turn off monkey patching experiment.log_parameters(vars(args)) # experiment.add_tag(args.comet_tag) experiment.set_name(args.comet_tag) experiment.add_tag(args.comet_real_tag) print("* Finished comet setup... ") return experiment else: return None
def log_hyperparameters_to_comet(clf, experiment): for i in range(len(clf.cv_results_["params"])): exp = Experiment( workspace="s0lvang", project_name="ideal-pancake-hyperparameter", api_key=globals.flags.comet_api_key, ) exp.add_tag("hp_tuning") exp.add_tags(globals.comet_logger.get_tags()) for k, v in clf.cv_results_.items(): if k == "params": exp.log_parameters(v[i]) else: exp.log_metric(k, v[i]) exp.end() old_experiment = ExistingExperiment( api_key=globals.flags.comet_api_key, previous_experiment=experiment.get_key(), ) globals.comet_logger = old_experiment
def test_comet(self): """Test with a comet hook.""" from comet_ml import Experiment comet = Experiment(project_name="Testing", auto_output_logging="native") comet.log_dataset_info(name="Karcher", path="shonan") comet.add_tag("GaussNewton") comet.log_parameter("method", "GaussNewton") time = datetime.now() comet.set_name("GaussNewton-" + str(time.month) + "/" + str(time.day) + " " + str(time.hour) + ":" + str(time.minute) + ":" + str(time.second)) # I want to do some comet thing here def hook(optimizer, error): comet.log_metric("Karcher error", error, optimizer.iterations()) gtsam_optimize(self.optimizer, self.params, hook) comet.end() actual = self.optimizer.values() self.gtsamAssertEquals(actual.atRot3(KEY), self.expected)
def train_nn( dataset: str, batch_size: int, depth: int, epochs: int ) -> Tuple[CNN, Tuple[Union[np.ndarray, np.ndarray], Union[ np.ndarray, np.ndarray]], Tuple[Union[np.ndarray, np.ndarray], Union[ np.ndarray, np.ndarray]]]: experiment = Experiment(project_name="cphap", auto_output_logging=False) experiment.add_tag(dataset) experiment.add_tag("NN-depth-{}".format(depth)) (x_train, y_train), (x_test, y_test) = fetch_dataset(dataset) scaler = TimeSeriesScalerMeanVariance() x_train: np.ndarray = scaler.fit_transform(x_train) x_test: np.ndarray = scaler.transform(x_test) x_train = x_train.transpose((0, 2, 1)).astype(np.float32) x_test = x_test.transpose((0, 2, 1)).astype(np.float32) n_features = x_train.shape[1] n_targets = len(np.unique(y_train)) train_ds = get_dataset(x_train, y_train) test_ds = get_dataset(x_test, y_test) train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True) test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False) model = CNN(n_features, 32, n_targets, depth=depth) optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss() runner = ClassificationRunner(model, optimizer, criterion, experiment) runner.add_loader("train", train_loader) runner.add_loader("test", test_loader) runner.train_config(epochs=epochs) runner.run() runner.quite() return runner.model.eval(), (x_train, x_test), (y_train, y_test)
if args.Xaugment: args.augment = True if args.Xbatchsize: args.batchsize = 250 if args.Xlrnrate: args.lrnrate = .2 if args.Xoptimizer: args.optimizer = 'sgd' if args.Xnet: args.net = 'ConvNet' args.lrnrate = .05 print(args) if rank == 0: experiment = Experiment(project_name='metapoison-victim', auto_param_logging=False, auto_metric_logging=False) experiment.log_parameters(vars(args)) experiment.log_parameter('nmeta', nmeta) experiment.set_name(args.key) experiment.add_tag(args.tag) experiment.log_model("CIFAR10", "../models/victim_model_1_run") # args.gpu = set_available_gpus(args) # again, hardcode the number of the GPU to avoid the error args.gpu = [1] if args.name == '': args.name = args.net def victim(): def comet_pull_next_poison(): # grab next poison from comet that hasn't been processed impatience = 0 # while not has_exitflag(args.key, api) or impatience < 5: # patience before ending victim process # Get rid off has_exitflag condition and also only keep the impagtience condition while impatience < 5: # patience before ending victim process
if hparams['cometml_log_audio']: audio_loggers = { 'enhancement': cometml_audio_logger.AudioLogger(fs=hparams["fs"], bs=1, n_sources=hparams['max_num_sources']) } experiment = Experiment(API_KEY, project_name=hparams['project_name']) experiment.log_parameters(hparams) experiment_name = '_'.join(hparams['cometml_tags']) log_dir = os.path.join(FED_LOG_DIR, experiment_name) if not os.path.exists(log_dir): os.makedirs(log_dir) for tag in hparams['cometml_tags']: experiment.add_tag(tag) if hparams['experiment_name'] is not None: experiment.set_name(hparams['experiment_name']) else: experiment.set_name(experiment_name) os.environ['CUDA_VISIBLE_DEVICES'] = ','.join( [cad for cad in hparams['cuda_available_devices']]) val_losses = {} all_losses = [] for val_set in [x for x in val_generators if not x == 'train']: val_losses[val_set] = {} for num_noises in [1, 2]: metric_name = 'SISDRi_enhancement_{}_noises'.format(num_noises) all_losses.append(val_set + '_{}'.format(metric_name))
def train(normal_digit, anomalies, folder, file, p_train, p_test): # Create an experiment experiment = Experiment(project_name="deep-stats-thesis", workspace="stecaron", disabled=True) experiment.add_tag("mnist_kpca") # General parameters DOWNLOAD_MNIST = True PATH_DATA = os.path.join(os.path.expanduser("~"), 'Downloads/mnist') # Define training parameters hyper_params = { "TRAIN_SIZE": 2000, "TRAIN_NOISE": p_train, "TEST_SIZE": 800, "TEST_NOISE": p_test, # on which class we want to learn outliers "CLASS_SELECTED": [normal_digit], # which class we want to corrupt our dataset with "CLASS_CORRUPTED": anomalies, "INPUT_DIM": 28 * 28, # In the case of MNIST "ALPHA": p_test, # level of significance for the test # hyperparameters gamma in rbf kPCA "GAMMA": [1], "N_COMP": [30] } # Log experiment parameterso0p experiment.log_parameters(hyper_params) # Load data train_data, test_data = load_mnist(PATH_DATA, download=DOWNLOAD_MNIST) # Normalize data train_data.data = train_data.data / 255. test_data.data = test_data.data / 255. # Build "train" and "test" datasets id_maj_train = numpy.random.choice(numpy.where( numpy.isin(train_data.train_labels, hyper_params["CLASS_SELECTED"]))[0], int((1 - hyper_params["TRAIN_NOISE"]) * hyper_params["TRAIN_SIZE"]), replace=False) id_min_train = numpy.random.choice(numpy.where( numpy.isin(train_data.train_labels, hyper_params["CLASS_CORRUPTED"]))[0], int(hyper_params["TRAIN_NOISE"] * hyper_params["TRAIN_SIZE"]), replace=False) id_train = numpy.concatenate((id_maj_train, id_min_train)) id_maj_test = numpy.random.choice(numpy.where( numpy.isin(test_data.test_labels, hyper_params["CLASS_SELECTED"]))[0], int((1 - hyper_params["TEST_NOISE"]) * hyper_params["TEST_SIZE"]), replace=False) id_min_test = numpy.random.choice(numpy.where( numpy.isin(test_data.test_labels, hyper_params["CLASS_CORRUPTED"]))[0], int(hyper_params["TEST_NOISE"] * hyper_params["TEST_SIZE"]), replace=False) id_test = numpy.concatenate((id_min_test, id_maj_test)) train_data.data = train_data.data[id_train] train_data.targets = train_data.targets[id_train] test_data.data = test_data.data[id_test] test_data.targets = test_data.targets[id_test] train_data.targets = numpy.isin(train_data.train_labels, hyper_params["CLASS_CORRUPTED"]) test_data.targets = numpy.isin(test_data.test_labels, hyper_params["CLASS_CORRUPTED"]) # Flatten the data and transform to numpy array train_data.data = train_data.data.view(-1, 28 * 28).numpy() test_data.data = test_data.data.view(-1, 28 * 28).numpy() # Train kPCA # param_grid = [{"gamma": hyper_params["GAMMA"], # "n_components": hyper_params["N_COMP"]}] param_grid = [{"n_components": hyper_params["N_COMP"]}] # kpca = KernelPCA(fit_inverse_transform=True, # kernel="rbf", # remove_zero_eig=True, # n_jobs=-1) kpca = PCA() #my_scorer2 = make_scorer(my_scorer, greater_is_better=True) # grid_search = GridSearchCV(kpca, param_grid, cv=ShuffleSplit( # n_splits=3), scoring=my_scorer) kpca.fit(train_data.data) X_kpca = kpca.transform(train_data.data) X_train_back = kpca.inverse_transform(X_kpca) X_test_back = kpca.inverse_transform(kpca.transform(test_data.data)) # Compute the distance between original data and reconstruction dist_train = numpy.linalg.norm(train_data.data - X_train_back, ord=2, axis=1) dist_test = numpy.linalg.norm(test_data.data - X_test_back, ord=2, axis=1) # Test performances on train train_anomalies_ind = numpy.argsort(dist_train)[int( (1 - hyper_params["ALPHA"]) * hyper_params["TRAIN_SIZE"]):int(hyper_params["TRAIN_SIZE"])] train_predictions = numpy.zeros(hyper_params["TRAIN_SIZE"]) train_predictions[train_anomalies_ind] = 1 train_recall = metrics.recall_score(train_data.targets, train_predictions) train_precision = metrics.precision_score(train_data.targets, train_predictions) train_f1_score = metrics.f1_score(train_data.targets, train_predictions) train_auc = metrics.roc_auc_score(train_data.targets, train_predictions) print(f"Train Precision: {train_precision}") print(f"Train Recall: {train_recall}") print(f"Train F1 Score: {train_f1_score}") print(f"Train AUC: {train_auc}") experiment.log_metric("train_precision", train_precision) experiment.log_metric("train_recall", train_recall) experiment.log_metric("train_f1_score", train_f1_score) experiment.log_metric("train_auc", train_auc) # Test performances on test test_probs = numpy.array( [numpy.sum(xi >= dist_train) / len(dist_train) for xi in dist_test], dtype=float) test_anomalies_ind = numpy.argwhere( test_probs >= 1 - hyper_params["ALPHA"]) test_predictions = numpy.zeros(hyper_params["TEST_SIZE"]) test_predictions[test_anomalies_ind] = 1 test_recall = metrics.recall_score(test_data.targets, test_predictions) test_precision = metrics.precision_score(test_data.targets, test_predictions) test_f1_score = metrics.f1_score(test_data.targets, test_predictions) test_auc = metrics.roc_auc_score(test_data.targets, test_probs) test_average_precision = metrics.average_precision_score( test_data.targets, test_predictions) print(f"Test Precision: {test_precision}") print(f"Test Recall: {test_recall}") print(f"Test F1 Score: {test_f1_score}") print(f"Test AUC: {test_auc}") print(f"Test average Precision: {test_average_precision}") experiment.log_metric("test_precision", test_precision) experiment.log_metric("test_recall", test_recall) experiment.log_metric("test_f1_score", test_f1_score) experiment.log_metric("test_auc", test_auc) experiment.log_metric("test_average_precision", test_average_precision) # Save the results in the output file col_names = [ "timestamp", "precision", "recall", "f1_score", "average_precision", "auc" ] results_file = os.path.join(folder, "results_" + file + ".csv") if os.path.exists(results_file): df_results = pandas.read_csv(results_file, names=col_names, header=0) else: df_results = pandas.DataFrame(columns=col_names) df_results = df_results.append(pandas.DataFrame(numpy.concatenate( (numpy.array( datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S')).reshape(1), test_precision.reshape(1), test_recall.reshape(1), test_f1_score.reshape(1), test_average_precision.reshape(1), test_auc.reshape(1))).reshape(1, -1), columns=col_names), ignore_index=True) df_results.to_csv(results_file)
'max_steps': MAX_STEPS, 'use_predictors': USE_PREDICTORS, 'batch_size': BATCH_SIZE, 'hidden_sizes': str(HIDDEN_SIZES), 'step_size': STEP_SIZE, 'predictor_lr': PREDICTOR_LR, 'lr': LR, 'epochs': EPOCHS, # 'max_grad_norm': MAX_GRAD_NORM, 'solver': 'MAX_GRAD_NORM', 'predictor_optimizer': 'SGD', # 'exploration_prob': EXPLORATION_PROB, 'nonlinearity_after_predictor': True, 'fixed_zero_grad': True, # 'predictor_hidden': PREDICTOR_HIDDEN, 'stopping_criterion': 'MaxGradNormSolver', # 'predictor_n_opt_steps': PREDICTOR_N_OPT_STEPS } EXP = Experiment(project_name='EqProp', auto_metric_logging=False) EXP.add_tag('linear predictor') EXP.log_parameters(hparams) comment = f'{MAX_STEPS}_steps' if USE_PREDICTORS: comment += '_predictors' main() EXP.end()
def train(config): print(config.n_classes) if config.use_quadruplet: assert config.use_embeddings, "Cannot use quadruplet loss without Embedding model" experiment = Experiment(api_key="Cbyqfs9Z8auN5ivKsbv2Z6Ogi", project_name="gta-crime-classification-2", workspace="beardedwhale") params = { 'ft_index': config.ft_begin_index, 'model': config.base_model, 'model_type': config.model_type, 'model_depth': config.model_depth, 'finetuning_block': config.finetune_block } experiment.log_parameters(params) experiment.log_parameters(vars(config)) experiment.add_tag(config.model_type) if config.use_quadruplet: experiment.add_tag('quadruplet_loss') model, params = generate_model(config) summary(model, input_size=(3, config.sample_duration, config.sample_size, config.sample_size)) dataset_path = config.dataset_path jpg_path = config.jpg_dataset_path config.scales = [config.initial_scale] for _ in range(1, config.n_scales): config.scales.append(config.scales[-1] * config.scale_step) config.arch = '{}-{}'.format(config.base_model, config.model_depth) config.mean = get_mean(config.norm_value, dataset=config.mean_dataset) config.std = get_std(config.norm_value, 'gta') # TODO handle gta? with open(os.path.join(config.result_path, 'config.json'), 'w') as opt_file: json.dump(vars(config), opt_file) if config.no_mean_norm and not config.std_norm: norm_method = Normalize([0, 0, 0], [1, 1, 1]) elif not config.std_norm: norm_method = Normalize(config.mean, [1, 1, 1]) else: norm_method = Normalize(config.mean, config.std) loaders: Dict[STEP, torch.utils.data.DataLoader] = {} loggers: Dict[STEP, Logger] = {} steps: [STEP] = [] metrics = [] if config.use_quadruplet: metrics.append('QUADRUPLET_LOSS') metrics.append('CLASSIFICATION_LOSS') if not config.no_train: assert config.train_crop in ['random', 'corner', 'center'] if config.train_crop == 'random': crop_method = MultiScaleRandomCrop(config.scales, config.sample_size) elif config.train_crop == 'corner': crop_method = MultiScaleCornerCrop(config.scales, config.sample_size) elif config.train_crop == 'center': crop_method = MultiScaleCornerCrop(config.scales, config.sample_size, crop_positions=['c']) spatial_transform = Compose([ crop_method, RandomHorizontalFlip(), ToTensor(config.norm_value), norm_method ]) temporal_transform = TemporalRandomCrop(config.sample_duration) target_transform = ClassLabel() assert os.path.exists(dataset_path) training_data = GTA_crime(dataset_path, jpg_path, 'train', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=config.sample_duration, dataset_conf_path=config.dataset_conf_path) train_loader = torch.utils.data.DataLoader( training_data, batch_size=config.batch_size, shuffle=True, num_workers=config.n_threads, pin_memory=True) train_logger = Logger(experiment, STEP.TRAIN, n_classes=config.n_finetune_classes, topk=[1, 2, 3], class_map=list(training_data.class_map.keys()), metrics=metrics) loaders[STEP.TRAIN] = train_loader loggers[STEP.TRAIN] = train_logger steps.append(STEP.TRAIN) if not config.no_val: val_data = GTA_crime(dataset_path, jpg_path, 'test', spatial_transform=spatial_transform, temporal_transform=temporal_transform, target_transform=target_transform, sample_duration=config.sample_duration, dataset_conf_path=config.dataset_conf_path) log.info(f'Loaded validation data: {len(val_data)} samples') val_loader = torch.utils.data.DataLoader(val_data, batch_size=config.batch_size, shuffle=True, num_workers=config.n_threads, pin_memory=True) val_logger = Logger(experiment, STEP.VAL, n_classes=config.n_finetune_classes, topk=[1, 2, 3], class_map=list(val_data.class_map.keys()), metrics=metrics) loaders[STEP.VAL] = val_loader loggers[STEP.VAL] = val_logger steps.append(STEP.VAL) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(lr=config.learning_rate, params=model.parameters()) for epoch in range(config.n_epochs): epoch_step(epoch, conf=config, criterion=criterion, loaders=loaders, model=model, loggers=loggers, optimizer=optimizer)
class CorefSolver(): def __init__(self, args): self.args = args self.data_utils = data_utils(args) self.disable_comet = args.disable_comet self.model = self.make_model( src_vocab=self.data_utils.vocab_size, tgt_vocab=self.data_utils.vocab_size, N=args.num_layer, dropout=args.dropout, entity_encoder_type=args.entity_encoder_type) print(self.model) if self.args.train: self.outfile = open(self.args.logfile, 'w') self.model_dir = make_save_dir(args.model_dir) # self.logfile = os.path.join(args.logdir, args.exp_name) # self.log = SummaryWriter(self.logfile) self.w_valid_file = args.w_valid_file def make_model(self, src_vocab, tgt_vocab, N=6, dropout=0.1, d_model=512, entity_encoder_type='linear', d_ff=2048, h=8): "Helper: Construct a model from hyperparameters." c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) attn_ner = MultiHeadedAttention(1, d_model, dropout) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) embed = Embeddings(d_model, src_vocab) word_embed = nn.Sequential(embed, c(position)) print('pgen', self.args.pointer_gen) if entity_encoder_type == 'transformer': # entity_encoder = nn.Sequential(embed, Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), 1)) print('transformer') entity_encoder = Seq_Entity_Encoder( embed, Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), 2)) elif entity_encoder_type == 'albert': albert_tokenizer = AlbertTokenizer.from_pretrained( 'albert-base-v2') albert = AlbertModel.from_pretrained('albert-base-v2') entity_encoder = Albert_Encoder(albert, albert_tokenizer, d_model) elif entity_encoder_type == 'gru': entity_encoder = RNNEncoder(embed, 'GRU', d_model, d_model, num_layers=1, dropout=0.1, bidirectional=True) print('gru') elif entity_encoder_type == 'lstm': entity_encoder = RNNEncoder(embed, 'LSTM', d_model, d_model, num_layers=1, dropout=0.1, bidirectional=True) print('lstm') if self.args.ner_at_embedding: model = EncoderDecoderOrg( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), DecoderOrg( DecoderLayerOrg(d_model, c(attn), c(attn), c(ff), dropout), N, d_model, tgt_vocab, self.args.pointer_gen), word_embed, word_embed, entity_encoder) else: if self.args.ner_last: decoder = Decoder( DecoderLayer(d_model, c(attn), c(attn), c(ff), dropout), N, d_model, tgt_vocab, self.args.pointer_gen, self.args.ner_last) else: decoder = Decoder( DecoderLayer_ner(d_model, c(attn), c(attn), attn_ner, c(ff), dropout, self.args.fusion), N, d_model, tgt_vocab, self.args.pointer_gen, self.args.ner_last) model = EncoderDecoder( Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), decoder, word_embed, word_embed, entity_encoder) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) # levels = 3 # num_chans = [d_model] * (args.levels) # k_size = 5 # tcn = TCN(embed, d_model, num_channels, k_size, dropout=dropout) return model.cuda() def train(self): if not self.disable_comet: # logging hyper_params = { "num_layer": self.args.num_layer, "pointer_gen": self.args.pointer_gen, "ner_last": self.args.ner_last, "entity_encoder_type": self.args.entity_encoder_type, "fusion": self.args.fusion, "dropout": self.args.dropout, } COMET_PROJECT_NAME = 'summarization' COMET_WORKSPACE = 'timchen0618' self.exp = Experiment( api_key='mVpNOXSjW7eU0tENyeYiWZKsl', project_name=COMET_PROJECT_NAME, workspace=COMET_WORKSPACE, auto_output_logging='simple', auto_metric_logging=None, display_summary=False, ) self.exp.log_parameters(hyper_params) self.exp.add_tags([ '%s entity_encoder' % self.args.entity_encoder_type, self.args.fusion ]) if self.args.ner_last: self.exp.add_tag('ner_last') if self.args.ner_at_embedding: self.exp.add_tag('ner_at_embedding') self.exp.set_name(self.args.exp_name) self.exp.add_tag('coreference') print('ner_last ', self.args.ner_last) print('ner_at_embedding', self.args.ner_at_embedding) # dataloader & optimizer data_yielder = self.data_utils.data_yielder(num_epoch=100) optim = torch.optim.Adam(self.model.parameters(), lr=1e-7, betas=(0.9, 0.998), eps=1e-8, amsgrad=True) #get_std_opt(self.model) # entity_optim = torch.optim.Adam(self.entity_encoder.parameters(), lr=1e-7, betas=(0.9, 0.998), eps=1e-8, amsgrad=True) total_loss = [] start = time.time() print('*' * 50) print('Start Training...') print('*' * 50) start_step = 0 # if loading from checkpoint if self.args.load_model: state_dict = torch.load(self.args.load_model)['state_dict'] self.model.load_state_dict(state_dict) print("Loading model from " + self.args.load_model + "...") # encoder_state_dict = torch.load(self.args.entity_encoder)['state_dict'] # self.entity_encoder.load_state_dict(encoder_state_dict) # print("Loading entity_encoder from %s" + self.args.entity_encoder + "...") start_step = int(torch.load(self.args.load_model)['step']) print('Resume training from step %d ...' % start_step) warmup_steps = 10000 d_model = 512 lr = 1e-7 for step in range(start_step, self.args.total_steps): self.model.train() batch = data_yielder.__next__() optim.zero_grad() # entity_optim.zero_grad() #update lr if step % 400 == 1: lr = (1 / (d_model**0.5)) * min( (1 / (step / 4)**0.5), step * (1 / (warmup_steps**1.5))) for param_group in optim.param_groups: param_group['lr'] = lr # for param_group in entity_optim.param_groups: # param_group['lr'] = lr batch['src'] = batch['src'].long() batch['tgt'] = batch['tgt'].long() batch['ner'] = batch['ner'].long() batch['src_extended'] = batch['src_extended'].long() # forward the model if self.args.entity_encoder_type == 'albert': d = self.model.entity_encoder.tokenizer.batch_encode_plus( batch['ner_text'], return_attention_masks=True, max_length=10, add_special_tokens=False, pad_to_max_length=True, return_tensors='pt') ner_mask = d['attention_mask'].cuda().unsqueeze(1) ner = d['input_ids'].cuda() # print('ner', ner.size()) # print('ner_mask', ner_mask.size()) # print('src_mask', batch['src_mask'].size()) if self.args.entity_encoder_type == 'gru' or self.args.entity_encoder_type == 'lstm': ner_feat = self.model.entity_encoder( batch['ner'].transpose(0, 1), batch['cluster_len'])[1] elif self.args.entity_encoder_type == 'transformer': mask = gen_mask(batch['cluster_len']) ner_feat = self.model.entity_encoder(batch['ner'], mask) ner, ner_mask = self.data_utils.pad_ner_feature( ner_feat.squeeze(), batch['num_clusters'], batch['src'].size(0)) # print('ner', ner.size()) # print('ner_mask', ner_mask.size()) if self.args.ner_at_embedding: out = self.model.forward(batch['src'], batch['tgt'], ner, batch['src_mask'], batch['tgt_mask'], batch['src_extended'], len(batch['oov_list'])) else: out = self.model.forward(batch['src'], batch['tgt'], ner, batch['src_mask'], batch['tgt_mask'], batch['src_extended'], len(batch['oov_list']), ner_mask) # print out info pred = out.topk(1, dim=-1)[1].squeeze().detach().cpu().numpy()[0] gg = batch['src_extended'].long().detach().cpu().numpy()[0][:100] tt = batch['tgt'].long().detach().cpu().numpy()[0] yy = batch['y'].long().detach().cpu().numpy()[0] #compute loss & update loss = self.model.loss_compute(out, batch['y'].long()) loss.backward() optim.step() # entity_optim.step() total_loss.append(loss.detach().cpu().numpy()) # logging information if step % self.args.print_every_steps == 1: elapsed = time.time() - start print("Epoch Step: %d Loss: %f Time: %f lr: %6.6f" % (step, np.mean(total_loss), elapsed, optim.param_groups[0]['lr'])) self.outfile.write("Epoch Step: %d Loss: %f Time: %f\n" % (step, np.mean(total_loss), elapsed)) print( 'src:\n', self.data_utils.id2sent(gg, False, False, batch['oov_list'])) print( 'tgt:\n', self.data_utils.id2sent(yy, False, False, batch['oov_list'])) print( 'pred:\n', self.data_utils.id2sent(pred, False, False, batch['oov_list'])) print('oov_list:\n', batch['oov_list']) if ner_mask != None and not self.args.ner_at_embedding: pp = self.model.greedy_decode( batch['src_extended'].long()[:1], ner[:1], batch['src_mask'][:1], 100, self.data_utils.bos, len(batch['oov_list']), self.data_utils.vocab_size, True, ner_mask[:1]) else: pp = self.model.greedy_decode( batch['src_extended'].long()[:1], ner[:1], batch['src_mask'][:1], 100, self.data_utils.bos, len(batch['oov_list']), self.data_utils.vocab_size, True) pp = pp.detach().cpu().numpy() print( 'pred_greedy:\n', self.data_utils.id2sent(pp[0], False, False, batch['oov_list'])) print() start = time.time() if not self.disable_comet: # self.log.add_scalar('Loss/train', np.mean(total_loss), step) self.exp.log_metric('Train Loss', np.mean(total_loss), step=step) self.exp.log_metric('Learning Rate', optim.param_groups[0]['lr'], step=step) self.exp.log_text('Src: ' + self.data_utils.id2sent( gg, False, False, batch['oov_list'])) self.exp.log_text('Tgt:' + self.data_utils.id2sent( yy, False, False, batch['oov_list'])) self.exp.log_text('Pred:' + self.data_utils.id2sent( pred, False, False, batch['oov_list'])) self.exp.log_text('Pred Greedy:' + self.data_utils.id2sent( pp[0], False, False, batch['oov_list'])) self.exp.log_text('OOV:' + ' '.join(batch['oov_list'])) total_loss = [] ########################## # validation ########################## if step % self.args.valid_every_steps == 2: print('*' * 50) print('Start Validation...') print('*' * 50) self.model.eval() val_yielder = self.data_utils.data_yielder(1, valid=True) total_loss = [] fw = open(self.w_valid_file, 'w') for batch in val_yielder: with torch.no_grad(): batch['src'] = batch['src'].long() batch['tgt'] = batch['tgt'].long() batch['ner'] = batch['ner'].long() batch['src_extended'] = batch['src_extended'].long() ### ner ###### if self.args.entity_encoder_type == 'albert': d = self.model.entity_encoder.tokenizer.batch_encode_plus( batch['ner_text'], return_attention_masks=True, max_length=10, add_special_tokens=False, pad_to_max_length=True, return_tensors='pt') ner_mask = d['attention_mask'].cuda().unsqueeze(1) ner = d['input_ids'].cuda() if self.args.entity_encoder_type == 'gru' or self.args.entity_encoder_type == 'lstm': ner_feat = self.model.entity_encoder( batch['ner'].transpose(0, 1), batch['cluster_len'])[1] elif self.args.entity_encoder_type == 'transformer': mask = gen_mask(batch['cluster_len']) ner_feat = self.model.entity_encoder( batch['ner'], mask) ner, ner_mask = self.data_utils.pad_ner_feature( ner_feat.squeeze(), batch['num_clusters'], batch['src'].size(0)) ### ner ###### if self.args.ner_at_embedding: out = self.model.forward(batch['src'], batch['tgt'], ner, batch['src_mask'], batch['tgt_mask'], batch['src_extended'], len(batch['oov_list'])) else: out = self.model.forward(batch['src'], batch['tgt'], ner, batch['src_mask'], batch['tgt_mask'], batch['src_extended'], len(batch['oov_list']), ner_mask) loss = self.model.loss_compute(out, batch['y'].long()) total_loss.append(loss.item()) if self.args.ner_at_embedding: pred = self.model.greedy_decode( batch['src_extended'].long(), ner, batch['src_mask'], self.args.max_len, self.data_utils.bos, len(batch['oov_list']), self.data_utils.vocab_size) else: pred = self.model.greedy_decode( batch['src_extended'].long(), ner, batch['src_mask'], self.args.max_len, self.data_utils.bos, len(batch['oov_list']), self.data_utils.vocab_size, ner_mask=ner_mask) for l in pred: sentence = self.data_utils.id2sent( l[1:], True, self.args.beam_size != 1, batch['oov_list']) fw.write(sentence) fw.write("\n") fw.close() # files_rouge = FilesRouge() # scores = files_rouge.get_scores(self.w_valid_file, self.args.valid_tgt_file, avg=True) scores = cal_rouge_score(self.w_valid_file, self.args.valid_ref_file) r1_score = scores['rouge1'] r2_score = scores['rouge2'] print('=============================================') print('Validation Result -> Loss : %6.6f' % (sum(total_loss) / len(total_loss))) print(scores) print('=============================================') self.outfile.write( '=============================================\n') self.outfile.write('Validation Result -> Loss : %6.6f\n' % (sum(total_loss) / len(total_loss))) self.outfile.write( '=============================================\n') # self.model.train() # self.log.add_scalar('Loss/valid', sum(total_loss)/len(total_loss), step) # self.log.add_scalar('Score/valid', r1_score, step) if not self.disable_comet: self.exp.log_metric('Valid Loss', sum(total_loss) / len(total_loss), step=step) self.exp.log_metric('R1 Score', r1_score, step=step) self.exp.log_metric('R2 Score', r2_score, step=step) #Saving Checkpoint w_step = int(step / 10000) print('Saving ' + str(w_step) + 'w_model.pth!\n') self.outfile.write('Saving ' + str(w_step) + 'w_model.pth\n') model_name = str(w_step) + 'w_' + '%6.6f' % ( sum(total_loss) / len(total_loss) ) + '%2.3f_' % r1_score + '%2.3f_' % r2_score + 'model.pth' state = {'step': step, 'state_dict': self.model.state_dict()} torch.save(state, os.path.join(self.model_dir, model_name)) # entity_encoder_name = str(w_step) + '0w_' + '%6.6f'%(sum(total_loss)/len(total_loss)) + '%2.3f_'%r1_score + 'entity_encoder.pth' # state = {'step': step, 'state_dict': self.entity_encoder.state_dict()} # torch.save(state, os.path.join(self.model_dir, entity_encoder_name)) def test(self): #prepare model path = self.args.load_model # entity_encoder_path = self.args.entity_encoder state_dict = torch.load(path)['state_dict'] max_len = self.args.max_len model = self.model model.load_state_dict(state_dict) # entity_encoder_dict = torch.load(entity_encoder_path)['state_dict'] # self.entity_encoder.load_state_dict(entity_encoder_dict) pred_dir = make_save_dir(self.args.pred_dir) filename = self.args.filename #start decoding data_yielder = self.data_utils.data_yielder(num_epoch=1) total_loss = [] start = time.time() #file f = open(os.path.join(pred_dir, filename), 'w') self.model.eval() # decode_strategy = BeamSearch( # self.beam_size, # batch_size=batch.batch_size, # pad=self._tgt_pad_idx, # bos=self._tgt_bos_idx, # eos=self._tgt_eos_idx, # n_best=self.n_best, # global_scorer=self.global_scorer, # min_length=self.min_length, max_length=self.max_length, # return_attention=attn_debug or self.replace_unk, # block_ngram_repeat=self.block_ngram_repeat, # exclusion_tokens=self._exclusion_idxs, # stepwise_penalty=self.stepwise_penalty, # ratio=self.ratio) step = 0 for batch in data_yielder: #print(batch['src'].data.size()) step += 1 if step % 100 == 0: print('%d batch processed. Time elapsed: %f min.' % (step, (time.time() - start) / 60.0)) start = time.time() ### ner ### if self.args.entity_encoder_type == 'albert': d = self.model.entity_encoder.tokenizer.batch_encode_plus( batch['ner_text'], return_attention_masks=True, max_length=10, add_special_tokens=False, pad_to_max_length=True, return_tensors='pt') ner_mask = d['attention_mask'].cuda().unsqueeze(1) ner = d['input_ids'].cuda() else: ner_mask = None ner = batch['ner'].long() with torch.no_grad(): if self.args.beam_size == 1: if self.args.ner_at_embedding: out = self.model.greedy_decode( batch['src_extended'].long(), self.model.entity_encoder(ner), batch['src_mask'], max_len, self.data_utils.bos, len(batch['oov_list']), self.data_utils.vocab_size) else: out = self.model.greedy_decode( batch['src_extended'].long(), self.model.entity_encoder(ner), batch['src_mask'], max_len, self.data_utils.bos, len(batch['oov_list']), self.data_utils.vocab_size, ner_mask=ner_mask) else: ret = self.beam_decode(batch, max_len, len(batch['oov_list'])) out = ret['predictions'] for l in out: sentence = self.data_utils.id2sent(l[1:], True, self.args.beam_size != 1, batch['oov_list']) #print(l[1:]) f.write(sentence) f.write("\n") def beam_decode(self, batch, max_len, oov_nums): src = batch['src'].long() src_mask = batch['src_mask'] src_extended = batch['src_extended'].long() bos_token = self.data_utils.bos beam_size = self.args.beam_size vocab_size = self.data_utils.vocab_size batch_size = src.size(0) def rvar(a): return a.repeat(beam_size, 1, 1) def rvar2(a): return a.repeat(beam_size, 1) def bottle(m): return m.view(batch_size * beam_size, -1) def unbottle(m): return m.view(beam_size, batch_size, -1) ### ner ### if self.args.entity_encoder_type == 'albert': d = self.model.entity_encoder.tokenizer.batch_encode_plus( batch['ner_text'], return_attention_masks=True, max_length=10, add_special_tokens=False, pad_to_max_length=True, return_tensors='pt') ner_mask = d['attention_mask'].cuda().unsqueeze(1) ner = d['input_ids'].cuda() else: ner_mask = None ner = batch['ner'].long() ner = self.model.entity_encoder(ner) if self.args.ner_at_embedding: memory = self.model.encode(src, src_mask, ner) else: memory = self.model.encode(src, src_mask) assert batch_size == 1 beam = [ Beam(beam_size, self.data_utils.pad, bos_token, self.data_utils.eos, min_length=self.args.min_length) for i in range(batch_size) ] memory = rvar(memory) ner = rvar(ner) src_mask = rvar(src_mask) src_extended = rvar2(src_extended) for i in range(self.args.max_len): if all((b.done() for b in beam)): break # Construct batch x beam_size nxt words. # Get all the pending current beam words and arrange for forward. inp = torch.stack([b.get_current_state() for b in beam]).t().contiguous().view(-1, 1) #inp -> [1, 3] inp_mask = inp < self.data_utils.vocab_size inp = inp * inp_mask.long() decoder_input = inp if self.args.ner_at_embedding: final_dist = self.model.decode(memory, ner, src_mask, decoder_input, None, src_extended, oov_nums) else: final_dist = self.model.decode(memory, ner, src_mask, decoder_input, None, src_extended, oov_nums, ner_mask=ner_mask) # final_dist, decoder_hidden, attn_dist_p, p_gen = self.seq2seq_model.model_copy.decoder( # decoder_input, decoder_hidden, # post_encoder_outputs, post_enc_padding_mask, # extra_zeros, post_enc_batch_extend_vocab # ) # # Run one step. # print('inp', inp.size()) # decoder_outputs: beam x rnn_size # (b) Compute a vector of batch*beam word scores. out = unbottle(final_dist) out[:, :, 2] = 0 #no unk # out.size -> [3, 1, vocab] # (c) Advance each beam. for j, b in enumerate(beam): b.advance(out[:, j]) # decoder_hidden = self.beam_update(j, b.get_current_origin(), beam_size, decoder_hidden) # (4) Extract sentences from beam. ret = self._from_beam(beam) return ret def _from_beam(self, beam): ret = {"predictions": [], "scores": []} for b in beam: n_best = self.args.n_best scores, ks = b.sort_finished(minimum=n_best) hyps = [] for i, (times, k) in enumerate(ks[:n_best]): hyp = b.get_hyp(times, k) hyps.append(hyp) ret["predictions"].append(hyps) ret["scores"].append(scores) return ret
def train(normal_digit, anomalies, folder, file, p_train, p_test): # Create an experiment experiment = Experiment(project_name="deep-stats-thesis", workspace="stecaron", disabled=True) experiment.add_tag("mnist_conv_ae") # General parameters DOWNLOAD_MNIST = True PATH_DATA = os.path.join(os.path.expanduser("~"), 'Downloads/mnist') device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") # Define training parameters hyper_params = { "EPOCH": 75, "NUM_WORKERS": 10, "BATCH_SIZE": 256, "LR": 0.001, "TRAIN_SIZE": 4000, "TRAIN_NOISE": p_train, "TEST_SIZE": 800, "TEST_NOISE": p_test, # on which class we want to learn outliers "CLASS_SELECTED": [normal_digit], # which class we want to corrupt our dataset with "CLASS_CORRUPTED": anomalies, "ALPHA": p_test, "MODEL_NAME": "mnist_ae_model", "LOAD_MODEL": False, "LOAD_MODEL_NAME": "mnist_ae_model" } # Log experiment parameters experiment.log_parameters(hyper_params) # Load data train_data, test_data = load_mnist(PATH_DATA, download=DOWNLOAD_MNIST) # Train the autoencoder model = ConvAutoEncoder2() optimizer = torch.optim.Adam(model.parameters(), lr=hyper_params["LR"]) #loss_func = nn.MSELoss() loss_func = nn.BCELoss() # Build "train" and "test" datasets id_maj_train = numpy.random.choice(numpy.where( numpy.isin(train_data.train_labels, hyper_params["CLASS_SELECTED"]))[0], int((1 - hyper_params["TRAIN_NOISE"]) * hyper_params["TRAIN_SIZE"]), replace=False) id_min_train = numpy.random.choice(numpy.where( numpy.isin(train_data.train_labels, hyper_params["CLASS_CORRUPTED"]))[0], int(hyper_params["TRAIN_NOISE"] * hyper_params["TRAIN_SIZE"]), replace=False) id_train = numpy.concatenate((id_maj_train, id_min_train)) id_maj_test = numpy.random.choice(numpy.where( numpy.isin(test_data.test_labels, hyper_params["CLASS_SELECTED"]))[0], int((1 - hyper_params["TEST_NOISE"]) * hyper_params["TEST_SIZE"]), replace=False) id_min_test = numpy.random.choice(numpy.where( numpy.isin(test_data.test_labels, hyper_params["CLASS_CORRUPTED"]))[0], int(hyper_params["TEST_NOISE"] * hyper_params["TEST_SIZE"]), replace=False) id_test = numpy.concatenate((id_min_test, id_maj_test)) train_data.data = train_data.data[id_train] train_data.targets = train_data.targets[id_train] test_data.data = test_data.data[id_test] test_data.targets = test_data.targets[id_test] train_data.targets = torch.from_numpy( numpy.isin(train_data.train_labels, hyper_params["CLASS_CORRUPTED"])).type(torch.int32) test_data.targets = torch.from_numpy( numpy.isin(test_data.test_labels, hyper_params["CLASS_CORRUPTED"])).type(torch.int32) train_loader = Data.DataLoader(dataset=train_data, batch_size=hyper_params["BATCH_SIZE"], shuffle=True, num_workers=hyper_params["NUM_WORKERS"]) test_loader = Data.DataLoader(dataset=test_data, batch_size=test_data.data.shape[0], shuffle=False, num_workers=hyper_params["NUM_WORKERS"]) model.train() if hyper_params["LOAD_MODEL"]: model = torch.load(hyper_params["LOAD_MODEL_NAME"]) else: train_mnist(train_loader, model, criterion=optimizer, n_epoch=hyper_params["EPOCH"], experiment=experiment, device=device, model_name=hyper_params["MODEL_NAME"], loss_func=loss_func, loss_type="binary") # Compute p-values model.to(device) pval, test_errors = compute_reconstruction_pval( train_loader, model, test_loader, device) pval_order = numpy.argsort(pval) # Plot p-values x_line = numpy.arange(0, len(test_data), step=1) y_line = numpy.linspace(0, 1, len(test_data)) y_adj = numpy.arange(0, len(test_data), step=1) / len(test_data) * hyper_params["ALPHA"] zoom = int(0.2 * len(test_data)) # nb of points to zoom #index = numpy.isin(test_data.test_labels, hyper_params["CLASS_CORRUPTED"]).astype(int) index = numpy.array(test_data.targets).astype(int) fig, (ax1, ax2) = plt.subplots(2, 1) ax1.scatter(numpy.arange(0, len(pval), 1), pval[pval_order], c=index[pval_order].reshape(-1)) ax1.plot(x_line, y_line, color="green") ax1.plot(x_line, y_adj, color="red") ax1.set_title( f'Entire test dataset with {int(hyper_params["TEST_NOISE"] * 100)}% of noise' ) ax1.set_xticklabels([]) ax2.scatter(numpy.arange(0, zoom, 1), pval[pval_order][0:zoom], c=index[pval_order].reshape(-1)[0:zoom]) ax2.plot(x_line[0:zoom], y_line[0:zoom], color="green") ax2.plot(x_line[0:zoom], y_adj[0:zoom], color="red") ax2.set_title('Zoomed in') ax2.set_xticklabels([]) experiment.log_figure(figure_name="empirical_test_hypothesis", figure=fig, overwrite=True) plt.savefig(os.path.join(folder, "pvalues_" + file + ".png")) plt.show() # Compute some stats precision, recall, f1_score, average_precision, roc_auc = test_performances( pval, index, hyper_params["ALPHA"]) print(f"Precision: {precision}") print(f"Recall: {recall}") print(f"F1 Score: {f1_score}") print(f"AUC: {roc_auc}") print(f"Average Precison: {average_precision}") experiment.log_metric("precision", precision) experiment.log_metric("recall", recall) experiment.log_metric("f1_score", f1_score) experiment.log_metric("auc", roc_auc) experiment.log_metric("average_precision", average_precision) # Show some examples fig, axs = plt.subplots(5, 5) fig.tight_layout() axs = axs.ravel() for i in range(25): image = test_data.data[pval_order[i]] axs[i].imshow(image, cmap='gray') axs[i].axis('off') experiment.log_figure(figure_name="rejetcted_observations", figure=fig, overwrite=True) plt.show() fig, axs = plt.subplots(5, 5) fig.tight_layout() axs = axs.ravel() for i in range(25): image = test_data.data[pval_order[int(len(pval) - 1) - i]] axs[i].imshow(image, cmap='gray') axs[i].axis('off') experiment.log_figure(figure_name="better_observations", figure=fig, overwrite=True) plt.show() # Save the results in the output file col_names = ["timestamp", "precision", "recall", "f1_score", "average_precision", "auc"] results_file = os.path.join(folder, "results_" + file + ".csv") if os.path.exists(results_file): df_results = pandas.read_csv(results_file, names=col_names, header=0) else: df_results = pandas.DataFrame(columns=col_names) df_results = df_results.append( pandas.DataFrame( numpy.concatenate( (numpy.array( datetime.datetime.fromtimestamp( time.time()).strftime('%Y-%m-%d %H:%M:%S')).reshape(1), precision.reshape(1), recall.reshape(1), f1_score.reshape(1), average_precision.reshape(1), roc_auc.reshape(1))).reshape(1, -1), columns=col_names), ignore_index=True) df_results.to_csv(results_file)
def train(rank, defparams, hyper): params = {} for param in defparams.keys(): params[param] = defparams[param] hyperp = {} for hp in hyper.keys(): hyperp[hp] = hyper[hp] experiment = Experiment(api_key="keGmeIz4GfKlQZlOP6cit4QOi", project_name="hadron-shower", workspace="engineren") experiment.add_tag(params['exp']) experiment.log_parameters(hyperp) device = torch.device("cuda") torch.manual_seed(params["seed"]) world_size = int(os.environ["SLURM_NNODES"]) rank = int(os.environ["SLURM_PROCID"]) dist.init_process_group(backend='nccl', world_size=world_size, rank=rank, init_method=params["DDP_init_file"]) aD = DCGAN_D(hyperp["ndf"]).to(device) aG = DCGAN_G(hyperp["ngf"], hyperp["z"]).to(device) aE = energyRegressor().to(device) aP = PostProcess_Size1Conv_EcondV2(48, 13, 3, 128, bias=True, out_funct='none').to(device) optimizer_g = torch.optim.Adam(aG.parameters(), lr=hyperp["L_gen"], betas=(0.5, 0.9)) optimizer_d = torch.optim.Adam(aD.parameters(), lr=hyperp["L_crit"], betas=(0.5, 0.9)) optimizer_e = torch.optim.SGD(aE.parameters(), lr=hyperp["L_calib"]) optimizer_p = torch.optim.Adam(aP.parameters(), lr=hyperp["L_post"], betas=(0.5, 0.9)) assert torch.backends.cudnn.enabled, "NVIDIA/Apex:Amp requires cudnn backend to be enabled." torch.backends.cudnn.benchmark = True # Initialize Amp models, optimizers = amp.initialize([aG, aD], [optimizer_g, optimizer_d], opt_level="O1", num_losses=2) #aD = nn.DataParallel(aD) #aG = nn.DataParallel(aG) #aE = nn.DataParallel(aE) aG, aD = models optimizer_g, optimizer_d = optimizers aG = nn.parallel.DistributedDataParallel(aG, device_ids=[0]) aD = nn.parallel.DistributedDataParallel(aD, device_ids=[0]) aE = nn.parallel.DistributedDataParallel(aE, device_ids=[0]) aP = nn.parallel.DistributedDataParallel(aP, device_ids=[0]) experiment.set_model_graph(str(aG), overwrite=False) experiment.set_model_graph(str(aD), overwrite=False) if params["restore_pp"]: aP.load_state_dict( torch.load(params["restore_path_PP"] + params["post_saved"], map_location=torch.device(device))) if params["restore"]: checkpoint = torch.load(params["restore_path"]) aG.load_state_dict(checkpoint['Generator']) aD.load_state_dict(checkpoint['Critic']) optimizer_g.load_state_dict(checkpoint['G_optimizer']) optimizer_d.load_state_dict(checkpoint['D_optimizer']) itr = checkpoint['iteration'] else: aG.apply(weights_init) aD.apply(weights_init) itr = 0 if params["c0"]: aE.apply(weights_init) elif params["c1"]: aE.load_state_dict( torch.load(params["calib_saved"], map_location=torch.device(device))) one = torch.tensor(1.0).to(device) mone = (one * -1).to(device) print('loading data...') paths_list = [ '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part1.hdf5', '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part2.hdf5', '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part3.hdf5', '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part4.hdf5', '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part5.hdf5', '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part6.hdf5', '/beegfs/desy/user/eren/data_generator/pion/hcal_only/pion40part7.hdf5' ] train_data = PionsDataset(paths_list, core=True) train_sampler = torch.utils.data.distributed.DistributedSampler( train_data, num_replicas=world_size, rank=rank) dataloader = DataLoader(train_data, batch_size=hyperp["batch_size"], num_workers=0, shuffle=False, drop_last=True, pin_memory=True, sampler=train_sampler) print('done') #scheduler_g = optim.lr_scheduler.StepLR(optimizer_g, step_size=1, gamma=params["gamma_g"]) #scheduler_d = optim.lr_scheduler.StepLR(optimizer_d, step_size=1, gamma=params["gamma_crit"]) #scheduler_e = optim.lr_scheduler.StepLR(optimizer_e, step_size=1, gamma=params["gamma_calib"]) #writer = SummaryWriter() e_criterion = nn.L1Loss() # for energy regressor training dataiter = iter(dataloader) BATCH_SIZE = hyperp["batch_size"] LATENT = hyperp["z"] EXP = params["exp"] KAPPA = hyperp["kappa"] LAMBD = hyperp["lambda"] ## Post-Processing LDP = hyperp["LDP"] wMMD = hyperp["wMMD"] wMSE = hyperp["wMSE"] ## IO paths OUTP = params['output_path'] for iteration in range(50000): iteration += itr + 1 #---------------------TRAIN D------------------------ for p in aD.parameters(): # reset requires_grad p.requires_grad_(True) # they are set to False below in training G for e in aE.parameters(): # reset requires_grad (constrainer) e.requires_grad_(True) # they are set to False below in training G for i in range(hyperp["ncrit"]): aD.zero_grad() aE.zero_grad() noise = np.random.uniform(-1, 1, (BATCH_SIZE, LATENT)) noise = torch.from_numpy(noise).float() noise = noise.view( -1, LATENT, 1, 1, 1) #[BS, nz] --> [Bs,nz,1,1,1] Needed for Generator noise = noise.to(device) batch = next(dataiter, None) if batch is None: dataiter = iter(dataloader) batch = dataiter.next() real_label = batch['energy'] ## energy label real_label = real_label.to(device) with torch.no_grad(): noisev = noise # totally freeze G, training D fake_data = aG(noisev, real_label).detach() real_data = batch['shower'] # 48x48x48 calo image real_data = real_data.to(device) real_data.requires_grad_(True) #### supervised-training for energy regressor! if params["train_calib"]: output = aE(real_data.float()) e_loss = e_criterion(output, real_label.view(BATCH_SIZE, 1)) e_loss.backward() optimizer_e.step() ###### # train with real data disc_real = aD(real_data.float(), real_label.float()) # train with fake data fake_data = fake_data.unsqueeze( 1) ## transform to [BS, 1, 48, 48, 48] disc_fake = aD(fake_data, real_label.float()) # train with interpolated data gradient_penalty = calc_gradient_penalty(aD, real_data.float(), fake_data, real_label, BATCH_SIZE, device, DIM=13) ## wasserstein-1 distace w_dist = torch.mean(disc_fake) - torch.mean(disc_real) # final disc cost disc_cost = torch.mean(disc_fake) - torch.mean( disc_real) + LAMBD * gradient_penalty with amp.scale_loss(disc_cost, optimizer_d) as scaled_loss: scaled_loss.backward() optimizer_d.step() #--------------Log to COMET ML ---------- if i == hyperp["ncrit"] - 1: experiment.log_metric("L_crit", disc_cost, step=iteration) experiment.log_metric("gradient_pen", gradient_penalty, step=iteration) experiment.log_metric("Wasserstein Dist", w_dist, step=iteration) if params["train_calib"]: experiment.log_metric("L_const", e_loss, step=iteration) #---------------------TRAIN G------------------------ for p in aD.parameters(): p.requires_grad_(False) # freeze D for c in aE.parameters(): c.requires_grad_(False) # freeze C gen_cost = None for i in range(hyperp["ngen"]): aG.zero_grad() noise = np.random.uniform(-1, 1, (BATCH_SIZE, LATENT)) noise = torch.from_numpy(noise).float() noise = noise.view( -1, LATENT, 1, 1, 1) #[BS, nz] --> [Bs,nz,1,1,1] Needed for Generator noise = noise.to(device) batch = next(dataiter, None) if batch is None: dataiter = iter(dataloader) batch = dataiter.next() real_label = batch['energy'] ## energy label real_label = real_label.to(device) noise.requires_grad_(True) real_data = batch['shower'] # 48x48x48 calo image real_data = real_data.to(device) fake_data = aG(noise, real_label.float()) fake_data = fake_data.unsqueeze( 1) ## transform to [BS, 1, 48, 48, 48] ## calculate loss function gen_cost = aD(fake_data.float(), real_label.float()) ## label conditioning #output_g = aE(fake_data) #output_r = aE(real_data.float()) output_g = 0.0 #for now output_r = 0.0 #for now aux_fake = (output_g - real_label)**2 aux_real = (output_r - real_label)**2 aux_errG = torch.abs(aux_fake - aux_real) ## Total loss function for generator g_cost = -torch.mean(gen_cost) + KAPPA * torch.mean(aux_errG) with amp.scale_loss(g_cost, optimizer_g) as scaled_loss_G: scaled_loss_G.backward() optimizer_g.step() #--------------Log to COMET ML ---------- experiment.log_metric("L_Gen", g_cost, step=iteration) ## plot example image if iteration % 100 == 0.0 or iteration == 1: image = fake_data.view(-1, 48, 13, 13).cpu().detach().numpy() cmap = mpl.cm.viridis cmap.set_bad('white', 1.) figExIm = plt.figure(figsize=(6, 6)) axExIm1 = figExIm.add_subplot(1, 1, 1) image1 = np.sum(image[0], axis=0) masked_array1 = np.ma.array(image1, mask=(image1 == 0.0)) im1 = axExIm1.imshow(masked_array1, filternorm=False, interpolation='none', cmap=cmap, vmin=0.01, vmax=100, norm=mpl.colors.LogNorm(), origin='lower') figExIm.patch.set_facecolor('white') axExIm1.set_xlabel('y [cells]', family='serif') axExIm1.set_ylabel('x [cells]', family='serif') figExIm.colorbar(im1) experiment.log_figure(figure=plt, figure_name="x-y") figExIm = plt.figure(figsize=(6, 6)) axExIm2 = figExIm.add_subplot(1, 1, 1) image2 = np.sum(image[0], axis=1) masked_array2 = np.ma.array(image2, mask=(image2 == 0.0)) im2 = axExIm2.imshow(masked_array2, filternorm=False, interpolation='none', cmap=cmap, vmin=0.01, vmax=100, norm=mpl.colors.LogNorm(), origin='lower') figExIm.patch.set_facecolor('white') axExIm2.set_xlabel('y [cells]', family='serif') axExIm2.set_ylabel('z [layers]', family='serif') figExIm.colorbar(im2) experiment.log_figure(figure=plt, figure_name="y-z") figExIm = plt.figure(figsize=(6, 6)) axExIm3 = figExIm.add_subplot(1, 1, 1) image3 = np.sum(image[0], axis=2) masked_array3 = np.ma.array(image3, mask=(image3 == 0.0)) im3 = axExIm3.imshow(masked_array3, filternorm=False, interpolation='none', cmap=cmap, vmin=0.01, vmax=100, norm=mpl.colors.LogNorm(), origin='lower') figExIm.patch.set_facecolor('white') axExIm3.set_xlabel('x [cells]', family='serif') axExIm3.set_ylabel('z [layers]', family='serif') figExIm.colorbar(im3) #experiment.log_metric("L_aux", aux_errG, step=iteration) experiment.log_figure(figure=plt, figure_name="x-z") ## E-sum monitoring figEsum = plt.figure(figsize=(6, 6 * 0.77 / 0.67)) axEsum = figEsum.add_subplot(1, 1, 1) etot_real = getTotE(real_data.cpu().detach().numpy(), xbins=13, ybins=13) etot_fake = getTotE(image, xbins=13, ybins=13) axEsumReal = axEsum.hist(etot_real, bins=25, range=[0, 1500], weights=np.ones_like(etot_real) / (float(len(etot_real))), label="orig", color='blue', histtype='stepfilled') axEsumFake = axEsum.hist(etot_fake, bins=25, range=[0, 1500], weights=np.ones_like(etot_fake) / (float(len(etot_fake))), label="generated", color='red', histtype='stepfilled') axEsum.text(0.25, 0.81, "WGAN", horizontalalignment='left', verticalalignment='top', transform=axEsum.transAxes, color='red') axEsum.text(0.25, 0.87, 'GEANT 4', horizontalalignment='left', verticalalignment='top', transform=axEsum.transAxes, color='blue') experiment.log_figure(figure=plt, figure_name="E-sum") #end = timer() #print(f'---train G elapsed time: {end - start}') if params["train_postP"]: #---------------------TRAIN P------------------------ for p in aD.parameters(): p.requires_grad_(False) # freeze D for c in aG.parameters(): c.requires_grad_(False) # freeze G lossP = None for i in range(1): noise = np.random.uniform(-1, 1, (BATCH_SIZE, LATENT)) noise = torch.from_numpy(noise).float() noise = noise.view( -1, LATENT, 1, 1, 1) #[BS, nz] --> [Bs,nz,1,1,1] Needed for Generator noise = noise.to(device) batch = next(dataiter, None) if batch is None: dataiter = iter(dataloader) batch = dataiter.next() real_label = batch['energy'] ## energy label real_label = real_label.to(device) noise.requires_grad_(True) real_data = batch['shower'] # calo image real_data = real_data.to(device) ## forward pass to generator fake_data = aG(noise, real_label.float()) fake_data = fake_data.unsqueeze( 1) ## transform to [BS, 1, layer, size, size] ### first LossD_P fake_dataP = aP(fake_data.float(), real_label.float()) lossD_P = aD(fake_dataP.float(), real_label.float()) lossD_P = lossD_P.mean() ## lossFixP real_sorted = real_data.view(BATCH_SIZE, -1) fake_sorted = fake_dataP.view(BATCH_SIZE, -1) real_sorted, _ = torch.sort(real_sorted, dim=1, descending=True) #.view(900,1) fake_sorted, _ = torch.sort(fake_sorted, dim=1, descending=True) #.view(900,1) lossFixPp1 = mmd_hit_sortKernel(real_sorted.float(), fake_sorted, kernel_size=100, stride=50, cutoff=2000, alpha=200) lossFixPp2 = F.mse_loss(fake_dataP.view(BATCH_SIZE, -1), fake_data.detach().view( BATCH_SIZE, -1), reduction='mean') lossFixP = wMMD * lossFixPp1 + wMSE * lossFixPp2 lossP = LDP * lossD_P - lossFixP lossP.backward(mone) optimizer_p.step() if iteration % 100 == 0 or iteration == 1: print('iteration: {}, critic loss: {}'.format( iteration, disc_cost.cpu().data.numpy())) if rank == 0: torch.save( { 'Generator': aG.state_dict(), 'Critic': aD.state_dict(), 'G_optimizer': optimizer_g.state_dict(), 'D_optimizer': optimizer_d.state_dict(), 'iteration': iteration }, OUTP + '{0}/wgan_itrs_{1}.pth'.format(EXP, iteration)) if params["train_calib"]: torch.save( aE.state_dict(), OUTP + '/{0}/netE_itrs_{1}.pth'.format(EXP, iteration)) if params["train_postP"]: torch.save( aP.state_dict(), OUTP + '{0}/netP_itrs_{1}.pth'.format(EXP, iteration))
#Test main tree module, only run comet experiment locally to debug callbacks import glob import geopandas as gpd import os is_travis = 'TRAVIS' in os.environ if not is_travis: from comet_ml import Experiment experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.add_tag("testing") else: experiment = None import numpy as np import pytest import pandas as pd import rasterio import tensorflow as tf from DeepTreeAttention.utils import metrics from DeepTreeAttention import trees from DeepTreeAttention.generators import boxes from matplotlib.pyplot import imshow from tensorflow.keras import metrics as keras_metrics #random label predictions just for testing test_predictions = "data/raw/2019_BART_5_320000_4881000_image_small.shp" #Use a small rgb crop as a example tile test_sensor_tile = "data/raw/2019_BART_5_320000_4881000_image_crop.tif"
print("Train a XGBoost model") val_size = 100000 # train = train.sort(['Date']) print(train.tail(1)["Date"]) X_train, X_test = train_test_split(train, test_size=0.01) # X_train, X_test = train.head(len(train) - val_size), train.tail(val_size) dtrain = xgb.DMatrix(X_train[features], np.log(X_train["Sales"] + 1)) dvalid = xgb.DMatrix(X_test[features], np.log(X_test["Sales"] + 1)) dtest = xgb.DMatrix(test[features]) watchlist = [(dvalid, "eval"), (dtrain, "train")] ## Experiment 1: everything as normal, using .train(): experiment = Experiment() experiment.add_tag("metrics") results = {} gbm = xgb.train( params, dtrain, num_trees, evals=watchlist, early_stopping_rounds=50, feval=rmspe_xg, verbose_eval=True, evals_result=results, ) experiment.end() ## Experiment 2: no results (thus no metrics), using .train(): experiment = Experiment()
hyper_params = { "sequence_length": 28, "input_size": 28, "hidden_size": 128, "num_layers": 3, "num_classes": 10, "batch_size": 100, "num_epochs": 2, "learning_rate": 0.02 } experiment = Experiment(api_key="YOUR_API_KEY", project_name="YOUR PROJECT", workspace="YOUR WORKSPACE") experiment.add_tag('pytorch') # MNIST Dataset train_dataset = dsets.MNIST(root='./data/', train=True, transform=transforms.ToTensor(), download=True) test_dataset = dsets.MNIST(root='./data/', train=False, transform=transforms.ToTensor()) # Data Loader (Input Pipeline) train_loader = torch.utils.data.DataLoader( dataset=train_dataset, batch_size=hyper_params['batch_size'],
from comet_ml import Experiment import tensorflow as tf from DeepTreeAttention.trees import AttentionModel from DeepTreeAttention.models import metadata from DeepTreeAttention.callbacks import callbacks import pandas as pd model = AttentionModel(config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml") model.create() #Log config experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.log_parameters(model.config["train"]) experiment.log_parameters(model.config["evaluation"]) experiment.log_parameters(model.config["predict"]) experiment.add_tag("HSI") ##Train #Train see config.yml for tfrecords path with weighted classes in cross entropy model.read_data() class_weight = model.calc_class_weight() ##Train subnetwork experiment.log_parameter("Train subnetworks", True) with experiment.context_manager("HSI_spatial_subnetwork"): print("Train HSI spatial subnetwork") model.read_data(mode="HSI_submodel") model.train(submodel="spatial", sensor="hyperspectral",class_weight=[class_weight, class_weight, class_weight], experiment=experiment) with experiment.context_manager("HSI_spectral_subnetwork"):
class Solver(): def __init__(self, args): self.args = args self.data_utils = DataUtils(args) if args.save_checkpoints: self.model_dir = make_save_dir(os.path.join(args.model_dir, args.sampler_label, args.exp_name)) self.disable_comet = args.disable_comet colorama.init(autoreset=True) def make_model(self, src_vocab, tgt_vocab, N=6, d_model=512, d_ff=2048, h=8, dropout=0.1, g_drop=0.1): "Helper: Construct a model from hyperparameters." c = copy.deepcopy attn = MultiHeadedAttention(h, d_model) ff = PositionwiseFeedForward(d_model, d_ff, dropout) position = PositionalEncoding(d_model, dropout) word_embed = nn.Sequential(Embeddings(d_model, src_vocab), c(position)) model = Sampler(Encoder(EncoderLayer(d_model, c(attn), c(ff), dropout), N), word_embed, Generator(d_model, self.args.num_classes, g_drop)) print(model) # This was important from their code. # Initialize parameters with Glorot / fan_avg. for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) return model.cuda() def train(self): # logging if not self.disable_comet: COMET_PROJECT_NAME = 'weibo-stc' COMET_WORKSPACE = 'timchen0618' self.exp = Experiment(project_name=COMET_PROJECT_NAME, workspace=COMET_WORKSPACE, auto_output_logging='simple', auto_metric_logging=None, display_summary=False, ) self.exp.add_tag(self.args.sampler_label) self.exp.add_tag('Sampler') if self.args.processed: self.exp.add_tag('processed') else: self.exp.add_tag('unprocessed') self.exp.set_name(self.args.exp_name) ###### loading .... ###### vocab_size = self.data_utils.vocab_size print("============================") print("=======start to build=======") print("============================") print("Vocab Size: %d"%(vocab_size)) #make model self.model = self.make_model(src_vocab=vocab_size, tgt_vocab=vocab_size, N=self.args.num_layer, dropout=self.args.dropout, g_drop=self.args.generator_drop ) self.model.load_embedding(self.args.pretrain_model) # sys.exit(0) lr = 1e-7 generator_lr = 1e-4 d_model = 512 warmup_steps = self.args.warmup_steps # optim = torch.optim.Adam([ # {'params':list(self.model.encoder.parameters())+list(self.model.src_embed.parameters())}, # {'params':self.model.generator.parameters(), 'lr':generator_lr} # ], lr=lr, betas=(0.9, 0.98), eps=1e-9) optim = torch.optim.AdamW([ {'params':list(self.model.encoder.parameters())+list(self.model.src_embed.parameters())}, {'params':self.model.generator.parameters(), 'lr':generator_lr} ], lr=lr, betas=(0.9, 0.98), eps=1e-9) # optim = torch.optim.Adam(self.model.parameters(), lr=lr, betas=(0.9, 0.98), eps=1e-9) total_loss = [] train_accs = [] start = time.time() step = 0 for epoch in range(self.args.num_epoch): self.model.train() train_data = self.data_utils.data_yielder(epo=epoch) for batch in train_data: optim.zero_grad() if step % 20 == 1: lr = self.args.lr * (1/(d_model**0.5))*min((1/(step)**0.5), step * (1/(warmup_steps**1.5))) optim.param_groups[0]['lr'] = lr lr2 = self.args.g_lr * (1/(d_model**0.5))*min((1/(step)**0.5), step * (1/(warmup_steps**1.5))) optim.param_groups[1]['lr'] = lr2 # for param_group in optim.param_groups: # param_group['lr'] = lr src = batch['src'].long() src_mask = batch['src_mask'].long() y = batch['y'] #forward model out = self.model.forward(src, src_mask) # print out pred = out.topk(5, dim=-1)[1].squeeze().detach().cpu().numpy() gg = batch['src'].long().detach().cpu().numpy() yy = batch['y'] #compute loss loss = self.model.loss_compute(out, y, self.args.multi) # else: # loss = self.model.loss_compute(out, y.long().unsqueeze(1), self.args.multi) #compute acc acc = self.model.compute_acc(out, batch['y'], self.args.multi) loss.backward() # print('emb_size', self.model.src_embed[0].lut.weight.size()) # print('emb', self.model.src_embed[0].lut.weight.grad.sum()) # print('enc_out', self.model.encoder.layers[0].feed_forward.w_1.weight.grad) optim.step() total_loss.append(loss.detach().cpu().numpy()) train_accs.append(acc) if step % self.args.print_every_step == 1: elapsed = time.time() - start print(Fore.GREEN + "[Step: %d]"%step + Fore.WHITE + " Loss: %f | Time: %f | Acc: %4.4f | Lr: %4.6f" %(np.mean(total_loss), elapsed, sum(train_accs)/len(train_accs), optim.param_groups[0]['lr'])) print(Fore.RED + 'src:', Style.RESET_ALL, self.id2sent(self.data_utils, gg[0][:150])) print(Fore.RED + 'y:', Style.RESET_ALL, yy[0]) print(Fore.RED + 'pred:', Style.RESET_ALL, pred[0]) # print(train_accs) # self.log.add_scalar('Loss/train', np.mean(total_loss), step) if not self.disable_comet: self.exp.log_metric('Train Loss', np.mean(total_loss), step=step) self.exp.log_metric('Train Acc', sum(train_accs)/len(train_accs), step=step) self.exp.log_metric('Learning Rate', lr, step=step) # print('grad', self.model.src_embed.grad) print(Style.RESET_ALL) start = time.time() total_loss = [] train_accs = [] if step % self.args.valid_every_step == self.args.valid_every_step-1: val_yielder = self.data_utils.data_yielder(epo=0, valid=True) self.model.eval() valid_losses = [] valid_accs = [] for batch in val_yielder: with torch.no_grad(): out = self.model.forward(batch['src'].long(), batch['src_mask'].long()) loss = self.model.loss_compute(out, batch['y'], self.args.multi) acc = self.model.compute_acc(out, batch['y'], self.args.multi) valid_accs.append(acc) valid_losses.append(loss.item()) print('=============================================') print('Validation Result -> Loss : %6.6f | Acc : %6.6f' %(sum(valid_losses)/len(valid_losses), sum(valid_accs)/ len(valid_accs))) print('=============================================') self.model.train() # self.log.add_scalar('Loss/valid', sum(valid_losses)/len(valid_losses), step) if not self.disable_comet: self.exp.log_metric('Valid Loss', sum(valid_losses)/ len(valid_losses), step=step) self.exp.log_metric('Valid Acc', sum(valid_accs)/ len(valid_accs), step=step) if self.args.save_checkpoints: print('saving!!!!') model_name = str(int(step/1000)) + 'k_' + '%6.6f_%6.6f'%(sum(valid_losses)/len(valid_losses), sum(valid_accs)/ len(valid_accs)) + 'model.pth' state = {'step': step, 'state_dict': self.model.state_dict()} #state = {'step': step, 'state_dict': self.model.state_dict(), # 'optimizer' : optim_topic_gen.state_dict()} torch.save(state, os.path.join(self.model_dir, model_name)) step += 1 def test(self): #prepare model path = self.args.load_model max_len = self.args.max_len state_dict = torch.load(path)['state_dict'] vocab_size = self.data_utils.vocab_size self.model = self.make_model(src_vocab = vocab_size, tgt_vocab = vocab_size, N = self.args.num_layer, dropout = self.args.dropout, g_drop = self.args.generator_drop ) model = self.model model.load_state_dict(state_dict) pred_dir = make_save_dir(self.args.pred_dir) filename = self.args.filename #start decoding data_yielder = self.data_utils.data_yielder() total_loss = [] start = time.time() #file f = open(os.path.join(pred_dir, filename), 'w') self.model.eval() step = 0 total_loss = [] corr, total = 0.0, 0.0 for batch in data_yielder: step += 1 if step % 10 == 1: print('Step ', step) # out = self.model.greedy_decode(batch['src'].long(), batch['src_mask'], max_len, self.data_utils.bos) with torch.no_grad(): out = self.model.forward(batch['src'].long(), batch['src_mask'].long()) if self.args.multi: # if True: print('out', out.size()) print('y', batch['y']) loss = self.model.loss_compute(out, batch['y'], self.args.multi) c = self.model.compute_acc(out, batch['y'], self.args.multi) corr += c*len(batch['y']) total += len(batch['y']) preds = out.argmax(dim=-1) for x in preds: f.write(str(x.item())) f.write('\n') # f.write(str(p.item())) # f.write('\n') else: loss = self.model.loss_compute(out, batch['y'], self.args.multi) # preds = out.argmax(dim = -1) _, preds = out.topk(dim = -1, k=10) print(preds.size()) for p in preds: for x in p: f.write(str(x.item())) f.write(' ') f.write('\n') # f.write(str(p.item())) # f.write('\n') total_loss.append(loss.item()) print(total_loss) print(sum(total_loss)/ len(total_loss)) print('acc: ', corr/float(total)) print(corr, ' ', total) def compute_acc(self, out, y): corr = 0. pred = out.argmax(dim = -1) for i, target in enumerate(y): # print(target) # print(pred[i]) if pred[i] in target: corr += 1 return corr, len(y) def id2sent(self, data_utils, indices, test=False): #print(indices) sent = [] word_dict={} for index in indices: if test and (index == data_utils.word2id['</s>'] or index in word_dict): continue sent.append(data_utils.index2word[index]) word_dict[index] = 1 return ' '.join(sent)
import tensorflow as tf from DeepTreeAttention.trees import AttentionModel from DeepTreeAttention.models import metadata from DeepTreeAttention.callbacks import callbacks import pandas as pd model = AttentionModel( config="/home/b.weinstein/DeepTreeAttention/conf/tree_config.yml") model.create() #Log config experiment = Experiment(project_name="neontrees", workspace="bw4sz") experiment.log_parameters(model.config["train"]) experiment.log_parameters(model.config["evaluation"]) experiment.log_parameters(model.config["predict"]) experiment.add_tag("metadata") ##Train #Train see config.yml for tfrecords path with weighted classes in cross entropy model.read_data(mode="metadata") #Cree inputs, outputs = metadata.metadata_model( classes=model.config["train"]["classes"]) meta_model = tf.keras.Model(inputs=inputs, outputs=outputs, name="DeepTreeAttention") meta_model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=["acc"])
def run(args, train, sparse_evidences, claims_dict): BATCH_SIZE = args.batch_size LEARNING_RATE = args.learning_rate DATA_SAMPLING = args.data_sampling NUM_EPOCHS = args.epochs MODEL = args.model RANDOMIZE = args.no_randomize PRINT = args.print use_cuda = torch.cuda.is_available() device = torch.device("cuda:0" if use_cuda else "cpu") logger = Logger('./logs/{}'.format(time.localtime())) if MODEL: print("Loading pretrained model...") model = torch.load(MODEL) model.load_state_dict(torch.load(MODEL).state_dict()) else: model = cdssm.CDSSM() model = model.cuda() model = model.to(device) # model = cdssm.CDSSM() # model = model.cuda() # model = model.to(device) if torch.cuda.device_count() > 0: print("Let's use", torch.cuda.device_count(), "GPU(s)!") model = nn.DataParallel(model) print("Created model with {:,} parameters.".format( putils.count_parameters(model))) # if MODEL: # print("TEMPORARY change to loading!") # model.load_state_dict(torch.load(MODEL).state_dict()) print("Created dataset...") # use an 80/20 train/validate split! train_size = int(len(train) * 0.80) #test = int(len(train) * 0.5) train_dataset = pytorch_data_loader.WikiDataset( train[:train_size], claims_dict, data_sampling=DATA_SAMPLING, sparse_evidences=sparse_evidences, randomize=RANDOMIZE) val_dataset = pytorch_data_loader.WikiDataset( train[train_size:], claims_dict, data_sampling=DATA_SAMPLING, sparse_evidences=sparse_evidences, randomize=RANDOMIZE) train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, num_workers=0, shuffle=True, collate_fn=pytorch_data_loader.PadCollate()) val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, num_workers=0, shuffle=True, collate_fn=pytorch_data_loader.PadCollate()) # Loss and optimizer criterion = torch.nn.NLLLoss() # criterion = torch.nn.SoftMarginLoss() # if torch.cuda.device_count() > 0: # print("Let's parallelize the backward pass...") # criterion = DataParallelCriterion(criterion) optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-3) OUTPUT_FREQ = max(int((len(train_dataset) / BATCH_SIZE) * 0.02), 20) parameters = { "batch size": BATCH_SIZE, "epochs": NUM_EPOCHS, "learning rate": LEARNING_RATE, "optimizer": optimizer.__class__.__name__, "loss": criterion.__class__.__name__, "training size": train_size, "data sampling rate": DATA_SAMPLING, "data": args.data, "sparse_evidences": args.sparse_evidences, "randomize": RANDOMIZE, "model": MODEL } experiment = Experiment(api_key="YLsW4AvRTYGxzdDqlWRGCOhee", project_name="clsm", workspace="moinnadeem") experiment.add_tag("train") experiment.log_asset("cdssm.py") experiment.log_dataset_info(name=args.data) experiment.log_parameters(parameters) model_checkpoint_dir = "models/saved_model" for key, value in parameters.items(): if type(value) == str: value = value.replace("/", "-") if key != "model": model_checkpoint_dir += "_{}-{}".format(key.replace(" ", "_"), value) print("Training...") beginning_time = time.time() best_loss = torch.tensor(float("inf"), dtype=torch.float) # begin loss at infinity for epoch in range(NUM_EPOCHS): beginning_time = time.time() mean_train_acc = 0.0 train_running_loss = 0.0 train_running_accuracy = 0.0 model.train() experiment.log_current_epoch(epoch) with experiment.train(): for train_batch_num, inputs in enumerate(train_dataloader): claims_tensors, claims_text, evidences_tensors, evidences_text, labels = inputs claims_tensors = claims_tensors.cuda() evidences_tensors = evidences_tensors.cuda() labels = labels.cuda() #claims = claims.to(device).float() #evidences = evidences.to(device).float() #labels = labels.to(device) y_pred = model(claims_tensors, evidences_tensors) y = (labels) # y = y.unsqueeze(0) # y = y.unsqueeze(0) # y_pred = parallel.gather(y_pred, 0) y_pred = y_pred.squeeze() # y = y.squeeze() loss = criterion(y_pred, torch.max(y, 1)[1]) # loss = criterion(y_pred, y) y = y.float() binary_y = torch.max(y, 1)[1] binary_pred = torch.max(y_pred, 1)[1] accuracy = (binary_y == binary_pred).to("cuda") accuracy = accuracy.float() accuracy = accuracy.mean() train_running_accuracy += accuracy.item() mean_train_acc += accuracy.item() train_running_loss += loss.item() if PRINT: for idx in range(len(y)): print( "Claim: {}, Evidence: {}, Prediction: {}, Label: {}" .format(claims_text[0], evidences_text[idx], torch.exp(y_pred[idx]), y[idx])) if (train_batch_num % OUTPUT_FREQ) == 0 and train_batch_num > 0: elapsed_time = time.time() - beginning_time binary_y = torch.max(y, 1)[1] binary_pred = torch.max(y_pred, 1)[1] print( "[{}:{}:{:3f}s] training loss: {}, training accuracy: {}, training recall: {}" .format( epoch, train_batch_num / (len(train_dataset) / BATCH_SIZE), elapsed_time, train_running_loss / OUTPUT_FREQ, train_running_accuracy / OUTPUT_FREQ, recall_score(binary_y.cpu().detach().numpy(), binary_pred.cpu().detach().numpy()))) # 1. Log scalar values (scalar summary) info = { 'train_loss': train_running_loss / OUTPUT_FREQ, 'train_accuracy': train_running_accuracy / OUTPUT_FREQ } for tag, value in info.items(): experiment.log_metric(tag, value, step=train_batch_num * (epoch + 1)) logger.scalar_summary(tag, value, train_batch_num + 1) ## 2. Log values and gradients of the parameters (histogram summary) for tag, value in model.named_parameters(): tag = tag.replace('.', '/') logger.histo_summary(tag, value.detach().cpu().numpy(), train_batch_num + 1) logger.histo_summary(tag + '/grad', value.grad.detach().cpu().numpy(), train_batch_num + 1) train_running_loss = 0.0 beginning_time = time.time() train_running_accuracy = 0.0 optimizer.zero_grad() loss.backward() optimizer.step() # del loss # del accuracy # del claims_tensors # del claims_text # del evidences_tensors # del evidences_text # del labels # del y # del y_pred # torch.cuda.empty_cache() print("Running validation...") model.eval() pred = [] true = [] avg_loss = 0.0 val_running_accuracy = 0.0 val_running_loss = 0.0 beginning_time = time.time() with experiment.validate(): for val_batch_num, val_inputs in enumerate(val_dataloader): claims_tensors, claims_text, evidences_tensors, evidences_text, labels = val_inputs claims_tensors = claims_tensors.cuda() evidences_tensors = evidences_tensors.cuda() labels = labels.cuda() y_pred = model(claims_tensors, evidences_tensors) y = (labels) # y_pred = parallel.gather(y_pred, 0) y_pred = y_pred.squeeze() loss = criterion(y_pred, torch.max(y, 1)[1]) y = y.float() binary_y = torch.max(y, 1)[1] binary_pred = torch.max(y_pred, 1)[1] true.extend(binary_y.tolist()) pred.extend(binary_pred.tolist()) accuracy = (binary_y == binary_pred).to("cuda") accuracy = accuracy.float().mean() val_running_accuracy += accuracy.item() val_running_loss += loss.item() avg_loss += loss.item() if (val_batch_num % OUTPUT_FREQ) == 0 and val_batch_num > 0: elapsed_time = time.time() - beginning_time print( "[{}:{}:{:3f}s] validation loss: {}, accuracy: {}, recall: {}" .format( epoch, val_batch_num / (len(val_dataset) / BATCH_SIZE), elapsed_time, val_running_loss / OUTPUT_FREQ, val_running_accuracy / OUTPUT_FREQ, recall_score(binary_y.cpu().detach().numpy(), binary_pred.cpu().detach().numpy()))) # 1. Log scalar values (scalar summary) info = {'val_accuracy': val_running_accuracy / OUTPUT_FREQ} for tag, value in info.items(): experiment.log_metric(tag, value, step=val_batch_num * (epoch + 1)) logger.scalar_summary(tag, value, val_batch_num + 1) ## 2. Log values and gradients of the parameters (histogram summary) for tag, value in model.named_parameters(): tag = tag.replace('.', '/') logger.histo_summary(tag, value.detach().cpu().numpy(), val_batch_num + 1) logger.histo_summary(tag + '/grad', value.grad.detach().cpu().numpy(), val_batch_num + 1) val_running_accuracy = 0.0 val_running_loss = 0.0 beginning_time = time.time() # del loss # del accuracy # del claims_tensors # del claims_text # del evidences_tensors # del evidences_text # del labels # del y # del y_pred # torch.cuda.empty_cache() accuracy = accuracy_score(true, pred) print("[{}] mean accuracy: {}, mean loss: {}".format( epoch, accuracy, avg_loss / len(val_dataloader))) true = np.array(true).astype("int") pred = np.array(pred).astype("int") print(classification_report(true, pred)) best_loss = torch.tensor( min(avg_loss / len(val_dataloader), best_loss.cpu().numpy())) is_best = bool((avg_loss / len(val_dataloader)) <= best_loss) putils.save_checkpoint( { "epoch": epoch, "model": model, "best_loss": best_loss }, is_best, filename="{}_loss_{}".format(model_checkpoint_dir, best_loss.cpu().numpy()))
from comet_ml import Experiment from film_test.cifar import qa_cifar experiment = Experiment( api_key="ZfKpzyaedH6ajYSiKmvaSwyCs", project_name="film-test", workspace="fgolemo") experiment.add_tag("qa") experiment.add_tag("vanilla-resnet") from torch import nn, optim from tqdm import trange from film_test.resnet import resnet18 from film_test.traintest import train, test, device EPOCHS = 24 net = resnet18(num_classes=2) net = net.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(net.parameters(), lr=0.1, momentum=0.9, weight_decay=5e-4) trainloader, testloader = qa_cifar() for epoch in trange(EPOCHS): experiment.log_metric("epoch", epoch) train( net,
npa = np.array device = torch.device("cuda" if torch.cuda.is_available() else "cpu") pi = torch.Tensor([np.pi]).float().to(device) exp_name = "24-newReparam" exp_dir = "experiments/" + exp_name + "-" + strftime("%Y%m%d%H%M%S") experiment = Experiment(api_key="ZfKpzyaedH6ajYSiKmvaSwyCs", project_name="rezende", workspace="fgolemo") experiment.log_parameters(params) experiment.set_name(exp_name) experiment.add_tag("random-shape") def normal(x, mu, sigma_sq): a = (-1 * (x - mu).pow(2) / (2 * sigma_sq)).exp() b = 1 / (2 * sigma_sq * pi.expand_as(sigma_sq)).sqrt() return a * b class Policy(nn.Module): def __init__(self, num_latents, num_outputs): super(Policy, self).__init__() self.relu = nn.ReLU()
action='store_true', help='resume from checkpoint') args = parser.parse_args() # Ideas # Pretrain network without permuted convolutions. Then train it using permuted/shuffled convolutions ################################################ num_channels_permuted = "5, 10" # model_name = "DenseNet_reduced_1x1_regularized_conv1-2" # model_name = "small_CNN_1x1_3x3_no_bias_LBFGS" model_name = "PermSmallCNN_SGD_LR_0.0001_LRS_no_bias" gpu_id = 3 reg_lambda = 5e-3 ################################################ experiment.add_tag(model_name) experiment.add_tag(num_channels_permuted) experiment.log_other("Network", model_name) experiment.log_other("Dataset", "CIFAR-100") experiment.log_other("Type", model_name) # experiment.log_other("Regularizer", reg_lambda) device = 'cuda:' + str(gpu_id) if torch.cuda.is_available() else 'cpu' # device = 'cpu' best_acc = 0 # best test accuracy start_epoch = 0 # start from epoch 0 or last checkpoint epoch train_batch_size = 250 test_batch_size = 250 # Data print('==> Preparing data..')
def main(): # Argument parser to select model type parser = argparse.ArgumentParser(description="Train a reinforcement learning flight controller.") parser.add_argument('-m','--model', help="RL Agent to train on.") args = vars(parser.parse_args()) # Create a Comet experiment with an API key experiment = Experiment(api_key="Bq3mQixNCv2jVzq2YBhLdxq9A", project_name="rl-flight-controller", workspace="alexbarnett12", log_env_gpu = False, log_env_cpu = False, log_env_host= False, log_git_metadata = False, log_git_patch = False) # Load training parameters cfg = configparser.ConfigParser() cfg.read(TRAINING_CONFIG) params = cfg["PARAMETERS"] # Set training parameters learning_rate_max = float(params["learning_rate_max"]) learning_rate_min = float(params["learning_rate_min"]) n_steps = int(params["N_steps"]) noptepochs = int(params["Noptepochs"]) nminibatches = int(params["Nminibatches"]) gamma = float(params["Gamma"]) lam = float(params["Lam"]) clip = float(params["Clip"]) ent_coeff = float(params["Ent_coeff"]) total_timesteps = int(params["Total_timesteps"]) # Linearly decreasing learning rate (only for PPO2) lr_callback = create_lr_callback(learning_rate_max, learning_rate_min) # Report hyperparameters to Comet hyper_params = {"learning_rate": learning_rate_max, "steps": n_steps, "epochs": noptepochs, "minibatches": nminibatches, "gamma": gamma, "lambda": lam, "clip_range": clip, "ent_coeff": ent_coeff, "total_timesteps": total_timesteps} experiment.log_parameters(hyper_params) # You can set the level to logger.DEBUG or logger.WARN if you # want to change the amount of output. logger.set_level(logger.DEBUG) # Create save directory and various save paths model_log_dir = create_model_log_dir() save_path = "./logs/" + model_log_dir + "/ckpts/" best_model_save_path = "./logs/" + model_log_dir + "/best_model/" log_path = "./logs/" + model_log_dir + "/results/" tensorboard_dir = "./logs/" + model_log_dir + "/tensorboard/" model_save_path = "./logs/saved_models/" + model_log_dir # Save training and reward params to model directory shutil.copy("./gymfc/reward_params.config", "./logs/" + model_log_dir + "/reward_params.config") shutil.copy("./gymfc/training_params.config", "./logs/" + model_log_dir + "/training_params.config") # Create a callback to save model checkpoints checkpoint_callback = CheckpointCallback(save_freq=100000, save_path=save_path, name_prefix='rl_model') # Create a separate evaluation environment #eval_env = gym.make('attitude-fc-v0') # Callback to evaluate the model during training #eval_callback = EvalCallback(eval_env, best_model_save_path=best_model_save_path, # log_path=log_path, eval_freq=100000) # Create training environment env = gym.make('attitude-fc-v0') # Callback to add max penalty watchers to Tensorboard tb_callback = TensorboardCallback(env) # Create the callback list #callback = CallbackList([checkpoint_callback, eval_callback, tb_callback]) callback = CallbackList([checkpoint_callback, tb_callback]) # RL Agent; Current options are PPO1 or PPO2 # Note: PPO2 does not work w/o vectorized environments (gymfc is not vectorized) if args["model"] == "PPO2": print("PPO2!") model = PPO2(MlpPolicy, env, n_steps=n_steps, learning_rate=lr_callback, noptepochs=noptepochs, nminibatches=nminibatches, gamma=gamma, lam=lam, cliprange=clip, ent_coef=ent_coeff, tensorboard_log=tensorboard_dir, policy_kwargs= {layers: [32,32]}) experiment.add_tag("PPO2") else: model = PPO1(MlpPolicy, env, timesteps_per_actorbatch=n_steps, optim_stepsize = learning_rate_max, schedule="linear", optim_epochs=noptepochs, optim_batchsize=nminibatches, gamma=gamma, lam=lam, clip_param=clip, entcoeff=ent_coeff, tensorboard_log=tensorboard_dir) experiment.add_tag("PPO1") # Train the model. Clean up environment on user cancellation try: model.learn(total_timesteps=total_timesteps, callback=callback) except KeyboardInterrupt: print("INFO: Ctrl-C caught. Cleaning up...") env.close() eval_env.close() model.save(model_save_path) env.close() eval_env.close()
import numpy import matplotlib.pyplot as plt from comet_ml import Experiment from mpl_toolkits.mplot3d import Axes3D from sklearn.covariance import MinCovDet from src.utils.pvalues import compute_pvalues experiment = Experiment(project_name="deep-stats-thesis", workspace="stecaron", disabled=False) experiment.add_tag("simulations_chi2") # Set distributions parameters hyper_params = {"N_DIM": 25, "N_OBS": 1000, "NOISE_PRC": 0.01} # Log experiment parameters experiment.log_parameters(hyper_params) MU = numpy.repeat(0, hyper_params["N_DIM"]) SIGMA = numpy.identity(hyper_params["N_DIM"]) # Simulate data dt = numpy.random.multivariate_normal(mean=MU, cov=SIGMA, size=hyper_params["N_OBS"]) # Plot normal data fig = plt.figure() ax = fig.add_subplot(111, projection='3d')