def main(arguments): # load the features of the dataset features = datasets.load_breast_cancer().data # standardize the features features = StandardScaler().fit_transform(features) # get the number of features num_features = features.shape[1] # load the labels for the features labels = datasets.load_breast_cancer().target train_features, test_features, train_labels, test_labels = train_test_split( features, labels, test_size=0.30, stratify=labels) model = MLP(alpha=LEARNING_RATE, batch_size=BATCH_SIZE, node_size=NUM_NODES, num_classes=NUM_CLASSES, num_features=num_features) model.train(num_epochs=arguments.num_epochs, log_path=arguments.log_path, train_data=[train_features, train_labels], train_size=train_features.shape[0], test_data=[test_features, test_labels], test_size=test_features.shape[0], result_path=arguments.result_path)
def define_model(self, model_name, ps): if model_name == 'catboost': return GBDTCatBoost(self.task, **ps) elif model_name == 'lightgbm': return GBDTLGBM(self.task, **ps) elif model_name == 'mlp': return MLP(self.task, **ps) elif model_name == 'gnn': return GNN(self.task, **ps) elif model_name == 'resgnn': gbdt = GBDTCatBoost(self.task) gbdt.fit(self.X, self.y, self.train_mask, self.val_mask, self.test_mask, cat_features=self.cat_features, num_epochs=1000, patience=100, plot=False, verbose=False, loss_fn=None, metric_name='loss' if self.task == 'regression' else 'accuracy') return GNN(task=self.task, gbdt_predictions=gbdt.model.predict(self.X), **ps) elif model_name == 'bgnn': return BGNN(self.task, **ps)
def get_model(format, optimised=True) -> AbstractModel: if format == 'LogisticRegression': return LogisticRegressionModel(optimised) if format == 'RandomForest': return RandomForestModel(optimised) if format == 'NaiveBayes': return NaiveBayes(optimised) if format == 'GradientBoosting': return GradientBoosting(optimised) if format == 'SVM': return SVM(optimised) if format == 'OneClassSVM': return OneClassSVMModel(optimised) if format == 'DecisionTree': return DecisionTree(optimised) if format == 'AdaBoost': return AdaBoost(optimised) if format == 'GaussianProcess': return GaussianProcess(optimised) if format == 'MLP': return MLP(optimised) if format == 'KNeighbors': return KNeighbors(optimised) if format == 'QuadraticDiscriminant': return QuadraticDiscriminant(optimised) if format == 'Dummy': return Dummy(optimised) else: raise ValueError(format)
def __init__(self, params): super(BCAgent, self).__init__(params) # Initialize policy network pol_params = self.params['p-bc']['pol_params'] pol_params['input_size'] = self.N pol_params['output_size'] = self.M if 'final_activation' not in pol_params: pol_params['final_activation'] = torch.tanh self.pol = MLP(pol_params) # Create policy optimizer ppar = self.params['p-bc']['pol_optim'] self.pol_optim = torch.optim.Adam(self.pol.parameters(), lr=ppar['lr'], weight_decay=ppar['reg']) # Use a replay buffer that will save planner actions self.pol_buf = ReplayBuffer(self.N, self.M, self.params['p-bc']['buf_size']) # Logging (store cum_rew, cum_emp_rew) self.hist['pols'] = np.zeros((self.T, 2)) self.has_pol = True self.pol_cache = ()
def main(args): exp_info = exp_config.Experiment(args.dataset) paths = exp_info.paths args.paths = paths args.metadata = exp_info.metadata np.random.seed(args.seed) torch.manual_seed(args.seed) batch_size = args.batch_size args.batch_size = 1 feature_size, train_loader, val_loader, test_loader, all_loader = exp_info.get_dataset( args, save=True) label_num = exp_info.get_label_num(args) hidden_size = 256 hidden_layers = 2 args.resume = os.path.join( paths.checkpoint_root, 'detection_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}'.format( args.task, args.model, args.epochs, args.lr, args.batch_size, args.lr_decay, 1 if not args.subsample else args.subsample, args.dropout_rate)) if args.model == 'lstm': detection_model = BiLSTM(feature_size, hidden_size, hidden_layers, label_num) else: detection_model = MLP(feature_size, hidden_size, label_num) detection_model = torch.nn.DataParallel(detection_model) logutils.load_checkpoint(args, detection_model) args.resume = os.path.join( paths.checkpoint_root, 'frame_prediction_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format( args.task, args.model, args.epochs, args.lr, args.batch_size, args.lr_decay, 1 if not args.subsample else args.subsample, args.dropout_rate, args.using_pred_duration)) if args.model == 'lstm': prediction_model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num) else: prediction_model = MLP(feature_size, hidden_size, label_num) prediction_model = torch.nn.DataParallel(prediction_model) logutils.load_checkpoint(args, prediction_model) validate(test_loader, detection_model, prediction_model, args=args)
def get_model(model): """ Get Model instance """ assert model in ['CNN', 'MLP'] if model == 'CNN': return Char_CNN(config, fc_layers, filter_sizes) else: return MLP(config, fc_layers)
def main(args): exp_info = exp_config.Experiment(args.dataset) paths = exp_info.paths args.paths = paths args.resume = os.path.join( paths.checkpoint_root, 'frame_prediction_{}_{}_e{}_lr{}_b{}_lrd{}_s{}_do{}_pd{}'.format( args.task, args.model, args.epochs, args.lr, args.batch_size, args.lr_decay, 1 if not args.subsample else args.subsample, args.dropout_rate, args.pred_duration)) np.random.seed(args.seed) torch.manual_seed(args.seed) feature_size, train_loader, val_loader, test_loader, _ = exp_info.get_dataset( args) label_num = exp_info.get_label_num(args) criterion = torch.nn.CrossEntropyLoss() hidden_size = 256 hidden_layers = 2 if args.model == 'lstm': model = LSTM_Pred(feature_size, hidden_size, hidden_layers, label_num) else: model = MLP(feature_size, hidden_size, label_num) optimizer = optim.Adam(model.parameters(), lr=args.lr) scheduler = lr_scheduler.StepLR(optimizer, args.lr_freq, args.lr_decay) model = torch.nn.DataParallel(model) if args.cuda: criterion = criterion.cuda() model = model.cuda() if args.resume: utils.load_checkpoint(args, model, optimizer, scheduler) best_prec = 0.0 if args.eval: validate(test_loader, model, args, test=True) else: for epoch in tqdm(range(args.start_epoch, args.epochs), desc='Epochs Loop'): train(train_loader, model, criterion, optimizer, epoch, args) prec = validate(val_loader, model, args) scheduler.step() best_prec = max(prec, best_prec) is_best = (best_prec == prec) tqdm.write('Best precision: {:.03f}'.format(best_prec)) if (epoch + 1) % args.save_interval == 0: utils.save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec': best_prec, 'optimizer': optimizer.state_dict(), 'scheduler': scheduler.state_dict() }, is_best, args)
def main(args): features, targets = generate_synthetic_data(args.model_type, args.num_samples) # split train/test sets x_train, x_val, y_train, y_val = train_test_split(features, targets, test_size=0.2) db_train = tf.data.Dataset.from_tensor_slices( (x_train, y_train)).batch(args.batch_size_train) db_val = tf.data.Dataset.from_tensor_slices( (x_val, y_val)).batch(args.batch_size_eval) if args.model_type == 'MLP': model = MLP(num_inputs=Constants._MLP_NUM_FEATURES, num_layers=Constants._MLP_NUM_LAYERS, num_dims=Constants._MLP_NUM_DIMS, num_outputs=Constants._NUM_TARGETS, dropout_rate=args.dropout) elif args.model_type == 'TCN': model = TCN(nb_filters=Constants._TCN_NUM_FILTERS, kernel_size=Constants._TCN_KERNEL_SIZE, nb_stacks=Constants._TCN_NUM_STACK, dilations=Constants._TCN_DIALATIONS, padding=Constants._TCN_PADDING, dropout_rate=args.lr) criteon = keras.losses.MeanSquaredError() optimizer = keras.optimizers.Adam(learning_rate=args.lr) for epoch in range(args.max_epoch): for step, (x, y) in enumerate(db_train): with tf.GradientTape() as tape: logits = model(x) loss = criteon(y, logits) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) if step % 100 == 0: print('Epoch: {}, Step: {}/{}, Loss: {}'.format( epoch, step, int(x_train.shape[0] / args.batch_size_train), loss)) # Perform inference and measure the speed every epoch start_time = time.time() for _, (x, _) in enumerate(db_val): _ = model.predict(x) end_time = time.time() print("Inference speed: {} samples/s\n".format( x_val.shape[0] / (end_time - start_time)))
def __init__(self, params): super(VPGAgent, self).__init__(params) self.H = self.params['pg']['H'] self.lam = self.params['pg']['lam'] # Initialize policy network pol_params = self.params['pg']['pol_params'] pol_params['input_size'] = self.N pol_params['output_size'] = self.M if 'final_activation' not in pol_params: pol_params['final_activation'] = torch.tanh self.pol = MLP(pol_params) # Std's are not dependent on state init_log_std = -0.8 * torch.ones(self.M) # ~0.45 self.log_std = torch.nn.Parameter(init_log_std, requires_grad=True) # Create policy optimizer ppar = self.params['pg']['pol_optim'] self.pol_params = list(self.pol.parameters()) + [self.log_std] self.pol_optim = torch.optim.Adam(self.pol_params, lr=ppar['lr'], weight_decay=ppar['reg']) # Create value function and optimizer val_params = self.params['pg']['val_params'] val_params['input_size'] = self.N val_params['output_size'] = 1 self.val = MLP(val_params) vpar = self.params['pg']['val_optim'] self.val_optim = torch.optim.Adam(self.val.parameters(), lr=vpar['lr'], weight_decay=vpar['reg']) # Logging self.hist['ent'] = np.zeros(self.T)
def __init__(self, params): self.params = params self.kappa = self.params['kappa'] self.dtype = self.params['dtype'] self.device = self.params['device'] self.models = [] self.priors = [] self.optims = [] for i in range(self.params['ens_size']): model = MLP(self.params['model_params']).to(device=self.device) self.models.append(model) self.optims.append( torch.optim.Adam(model.parameters(), lr=self.params['lr'], weight_decay=self.params['reg'])) prior = MLP(self.params['model_params']).to(device=self.device) prior.eval() self.priors.append(prior)
def __init__(self, cnn_args, mlp_args): super(CNN, self).__init__() # embedding layer self.embedding_dim = cnn_args['emb_dim'] self.embedding = nn.Embedding(cnn_args['vocab_size'], self.embedding_dim) # initialize with pretrained embeddings print("Initializing with pretrained embeddings") self.embedding.weight.data.copy_(cnn_args['pretrained_emb']) # Dropout definition self.dropout = nn.Dropout(0.25) # CNN parameters definition # Kernel sizes self.kernel_1 = 2 self.kernel_2 = 3 self.kernel_3 = 4 self.kernel_4 = 5 # Num kernels for each convolution size self.seq_len = cnn_args['text_len'] # Output size for each convolution self.out_channels = cnn_args['num_kernel'] # Number of strides for each convolution self.stride = cnn_args['stride'] # Convolution layers definition self.conv_1 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_1, self.stride) self.conv_2 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_2, self.stride) self.conv_3 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_3, self.stride) self.conv_4 = nn.Conv1d(self.seq_len, self.out_channels, self.kernel_4, self.stride) # Max pooling layers definition self.pool_1 = nn.MaxPool1d(self.kernel_1, self.stride) self.pool_2 = nn.MaxPool1d(self.kernel_2, self.stride) self.pool_3 = nn.MaxPool1d(self.kernel_3, self.stride) self.pool_4 = nn.MaxPool1d(self.kernel_4, self.stride) # MLP classfier mlp_input_size = self.in_features_fc() #print("mlp_input_size:", mlp_input_size) self.mlp = MLP(input_size=mlp_input_size, hidden_size=mlp_args['hidden_size'], num_classes=mlp_args['num_classes'])
def get_model(args): if args.model == "mlp": return MLP(args.input_size * 2, args.hidden_size, args.dropout, args.output_size) elif args.model == "attention": return Attention(args.input_size * 2, args.hidden_size[0], args.layers, args.dropout, args.output_size, gpu=args.gpu) elif args.model == 'linear': return Linear(args.input_size * 2, args.output_size) else: assert False
def get_model(config, args, seq_indexer, label_indexer): if config['type'] == 'RNN': return TextRNNAttn(embedding_alphabet=seq_indexer, gpu=args.gpu, feat_num=label_indexer.__len__(), **config['model']) elif config['type'] == 'CNN': return TextCNN(embedding_alphabet=seq_indexer, gpu=args.gpu, feat_num=label_indexer.__len__(), **config['model']) elif config['type'] == 'MLP': return MLP(embedding_alphabet=seq_indexer, gpu=args.gpu, feat_num=label_indexer.__len__(), **config['model']) else: raise RuntimeError('no model')
def __init__(self, kernelSize=11, featureSize=1024): super().__init__() assert kernelSize % 2 == 1, "kernel should be odd" self.conv1 = nn.Conv1d(featureSize, 64, kernelSize, padding=kernelSize // 2) self.maxpool1 = nn.MaxPool1d(2) self.conv2 = nn.Conv1d(64, 96, kernelSize, padding=kernelSize // 2) self.maxpool2 = nn.MaxPool1d(2) self.upsample1 = nn.Upsample(scale_factor=2, mode="nearest") self.conv3 = nn.Conv1d(96, 64, kernelSize, padding=kernelSize // 2) self.upsample2 = nn.Upsample(scale_factor=2, mode="nearest") self.conv4 = nn.Conv1d(64, featureSize, kernelSize, padding=kernelSize // 2) self.classifier = MLP(featureSize) self.featureSize = featureSize
def __init__(self, lstm_args, mlp_args): super(LSTM, self).__init__() # setting hyperparams self.hidden_dim = lstm_args['hidden_size'] self.dropout_prob = lstm_args['dropout'] self.use_gru = lstm_args['gru'] self.embedding_dim = lstm_args['emb_dim'] # embedding layer self.embedding = nn.Embedding(lstm_args['vocab_size'], self.embedding_dim) # initialize with pretrained word emb if provided if 'pretrained_emb' in lstm_args: print("Initializing with pretrained embeddings") self.embedding.weight.data.copy_(lstm_args['pretrained_emb']) # biLSTM layer + dropout self.lstm = nn.LSTM(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=2, batch_first=True, bidirectional=True) self.drop = nn.Dropout(p=self.dropout_prob) # GRU layer self.gru = nn.GRU(input_size=self.embedding_dim, hidden_size=self.hidden_dim, num_layers=2, batch_first=True, bidirectional=True, dropout=self.dropout_prob) # fully-connected linear layer mlp_input_size = 2 * self.hidden_dim self.mlp = MLP(input_size=mlp_input_size, hidden_size=mlp_args['hidden_size'], num_classes=mlp_args['num_classes'])
'dev'], datasets['test'] seq_indexer = SeqIndexerBaseEmbeddings("glove", args.embedding_dir, args.embedding_dim, ' ') seq_indexer.load_embeddings_from_file() label_indexer = SeqIndexerBase("laebl", False, False) label_indexer.add_instance(dataset.train_label) if args.load is not None: model = torch.load(args.load) if args.gpu >= 0: model.cuda(device=args.gpu) else: if args.model == 'MLP': model = MLP(embedding_indexer=seq_indexer, gpu=args.gpu, feat_num=label_indexer.__len__(), dropout=args.dropout_rate) elif args.model == 'CNN': model = TextCNN(embedding_indexer=seq_indexer, gpu=args.gpu, feat_num=label_indexer.__len__(), dropout=args.dropout_rate, kernel_size=[2, 3, 5]) criterion = torch.nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate, betas=(0.9, 0.999), eps=1e-08, weight_decay=0, amsgrad=False)
parser.add_argument( '-v', '--validation', dest='val', type=float, default=10.0, help='Percent of the data that is used as validation (0-100)') return parser.parse_args() if __name__ == '__main__': args = get_args() device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') net = MLP(1, 3) if args.load: net.lead_state_dict(torch.load(args.load, map_location=device)) net.to(device=device) try: train_net(net=net, epochs=args.epochs, batch_size=args.batchsize, lr=args.lr, device=device, val_percent=args.val / 100) except KeyboardInterrupt: torch.save(net.state_dict(), 'INTERRUPTED.pth')
def main(): global opt opt = parser.parse_args() use_gpu = torch.cuda.is_available() # Set up logging if opt.savepath == None: path = os.path.join('save', datetime.datetime.now().strftime("%d-%H-%M-%S")) else: path = opt.savepath os.makedirs(path, exist_ok=True) logger = utils.Logger(path) # Keep track of accuracies val_accuracies = [] test_accuracies = [] # Seed for cross-val split seed = random.randint(0,10000) if opt.seed < 0 else opt.seed logger.log('SEED: {}'.format(seed), stdout=False) # Load data if opt.preloaded_splits.lower() == 'none': start = time.time() data, label = get_data(opt.data, opt.label) logger.log('Data loaded in {:.1f}s\n'.format(time.time() - start)) else: data, label = np.zeros(5), np.zeros(5) # dummy labels for iterating over logger.log('Using preloaded splits\n') # Create cross-validation splits kf = StratifiedKFold(n_splits=5, random_state=seed, shuffle=True) # Cross validate for i, (train_index, test_index) in enumerate(kf.split(data, label)): # Log split logger.log('------------- SPLIT {} --------------\n'.format(i+1)) # Train / test split (ignored if opt.preloaded_splits is not 'none') X, X_test = data[train_index], data[test_index] y, y_test = label[train_index], label[test_index] # Perform PCA and generate dataloader or load from saved file start = time.time() apply_pca_transform = (opt.arch not in ['exp']) train_loader, val_loader, test_loader, pca_components, input_size, num_classes, pca_matrix = \ get_dataloader(opt.preloaded_splits, X, X_test, y, y_test, batch_size=opt.b, val_fraction=opt.val_fraction, pca_components=opt.pca_components, apply_pca_transform=apply_pca_transform, imputation_dim=opt.impute, split=i, save_dataset=(not opt.no_save_dataset)) logger.log('Dataloader loaded in {:.1f}s\n'.format(time.time() - start)) # Model arch = opt.arch.lower() assert arch in ['logreg', 'mlp', 'exp'] if arch == 'logreg': model = LogisticRegression(input_size, opt.pca_components, num_classes) elif arch == 'mlp': model = MLP(input_size, opt.hidden_size, num_classes, opt.dp) elif arch == 'exp': model = ExperimentalModel(input_size, opt.pca_components, opt.hidden_size, num_classes, opt.dp) # Pretrained / Initialization if opt.model is not None and os.path.isfile(opt.model): # Pretrained model model.load_state_dict(torch.load(opt.model)) logger.log('Loaded pretrained model.', stdout=(i==0)) else: # Initialize model uniformly for p in model.parameters(): p.data.uniform_(-0.1, 0.1) logger.log('Initialized model from scratch.', stdout=(i==0)) model = model.cuda() if use_gpu else model print(model) # Initialize first layer with PCA and fix PCA weights if model requires if opt.arch in ['exp']: model.first_layer.weight.data.copy_(pca_matrix) logger.log('Initialized first layer as PCA', stdout=(i==0)) if not opt.finetune_pca: model.first_layer.weight.requires_grad = False logger.log('Fixed PCA weights', stdout=(i==0)) # Loss function and optimizer criterion = nn.CrossEntropyLoss(size_average=False) optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=opt.lr, weight_decay=opt.wd) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'max', patience=opt.lr_decay_patience, factor=opt.lr_decay_factor, verbose=True, cooldown=opt.lr_decay_cooldown) # Log parameters logger.log('COMMAND LINE ARGS: ' + ' '.join(sys.argv), stdout=False) logger.log('ARGS: {}\nOPTIMIZER: {}\nLEARNING RATE: {}\nSCHEDULER: {}\nMODEL: {}\n'.format( opt, optimizer, opt.lr, vars(scheduler), model), stdout=False) # If specified, only evaluate model if opt.evaluate: assert opt.model != None, 'no pretrained model to evaluate' total_correct, total, _ = validate(model, val_loader, criterion) logger.log('Accuracy: {:.3f} \t Total correct: {} \t Total: {}'.format( total_correct/total, total_correct, total)) return # Train model start_time = time.time() best_acc = train(model, train_loader, val_loader, optimizer, criterion, logger, num_epochs=opt.epochs, print_freq=opt.print_freq, model_id=i) logger.log('Best train accuracy: {:.2f}% \t Finished split {} in {:.2f}s\n'.format( 100 * best_acc, i+1, time.time() - start_time)) val_accuracies.append(best_acc) # Best evaluation on validation set best_model_path = os.path.join(path, 'model_{}.pth'.format(i)) model.load_state_dict(torch.load(best_model_path)) # load best model total_correct, total, _ = validate(model, val_loader, criterion) # check val set logger.log('Val Accuracy: {:.3f} \t Total correct: {} \t Total: {}'.format( total_correct/total, total_correct, total)) # Optionally also evaluate on test set if opt.test: total_correct, total, visualize = validate(model, test_loader, criterion, visualize=True) # run test set logger.log('Test Accuracy: {:.3f} \t Total correct: {} \t Total: {}\n'.format( total_correct/total, total_correct, total)) logger.save_model(visualize, 'visualize_{}.pth'.format(i)) test_accuracies.append(total_correct/total) # Log after training logger.log('Val Accuracies: {}'.format(val_accuracies)) logger.log('Test Accuracies: {}'.format(test_accuracies)) logger.log('Run id: {} \t Test Accuracies: {}'.format(opt.id, test_accuracies))
def __init__(self, cfg): trainLoader, valLoader = get_dataloaders( args.trainNormalFolder, args.trainNormalAnnotations, args.trainAbnormalFolder, args.trainAbnormalAnnotations, args.trainNormalTopK, args.valNormalFolder, args.valNormalAnnotations, args.valAbnormalFolder, args.valAbnormalAnnotations, args.valNormalTopK, args.batchSize, args.numWorkers, args.model, args.windowSize, args.subWindows, args.featureSize, args.maxVideoSize) self.modelType = args.model self.trainLoader = trainLoader self.valLoader = valLoader self.expFolder = args.expFolder self.maskValue = args.maskValue self.stepCounter = 0 self.bestAUC = 0 self.noNormalSegmentation = args.noNormalSegmentation self.lossType = args.loss if args.model == "mlp": self.model = MLP(featureSize=args.featureSize) elif args.model == "tcn": self.model = EDTCN(featureSize=args.featureSize, kernelSize=args.kernelSize) elif args.model == "mstcn": self.model = MultiStageModel(num_stages=args.numStages, num_layers=args.numLayers, num_f_maps=args.numFeatureMaps, dim=args.featureSize, ssRepeat=args.firstStageRepeat) print("[Info] MS-TCN W{}-S{}-L{} have been created".format( args.windowSize, args.numStages, args.numLayers)) # elif args.model == "mcbtcn": # self.model = MultiClassBinaryTCN(numClassStages=args.numClassStages, numBinaryStages=args.numBinaryStages, # num_layers=args.numLayers, num_f_maps=args.numFeatureMaps, # dim=args.featureSize, numClasses=16) self.model = self.model.float() # if torch.cuda.is_available(): # self.model = self.model.cuda() if args.optimizer == "adam": self.optimizer = torch.optim.Adam(self.model.parameters(), lr=args.learningRate, betas=(0.5, 0.9), eps=1e-08, weight_decay=0, amsgrad=False) self.scheduler = torch.optim.lr_scheduler.StepLR( self.optimizer, step_size=args.schedulerStepSize, gamma=args.schedulerGamma) if args.modelPath: self.loadCheckpoint(args.modelPath) print("[Info] Model have been loaded at {}".format(args.modelPath)) if torch.cuda.is_available(): self.model = self.model.cuda() self.model = self.model.float() self.ceLoss = torch.nn.CrossEntropyLoss(ignore_index=-1) self.ASLoss = TemporalHardPairLoss(max_violation=True, margin=args.adLossMargin, measure="output") self.mseLoss = torch.nn.MSELoss() self.lossLambda = args.adLossLambda self.writer = None if not args.test: self.writer = SummaryWriter(log_dir=args.expFolder)
def choose_model(conf, G, features, labels, byte_idx_train, labels_one_hot): if conf['model_name'] == 'GCN': model = GCN(g=G, in_feats=features.shape[1], n_hidden=conf['hidden'], n_classes=labels.max().item() + 1, n_layers=1, activation=F.relu, dropout=conf['dropout']).to(conf['device']) elif conf['model_name'] == 'GAT': num_heads = 8 num_layers = 1 num_out_heads = 1 heads = ([num_heads] * num_layers) + [num_out_heads] model = GAT( g=G, num_layers=num_layers, in_dim=G.ndata['feat'].shape[1], num_hidden=8, num_classes=labels.max().item() + 1, heads=heads, activation=F.relu, feat_drop=0.6, attn_drop=0.6, negative_slope=0.2, # negative slope of leaky relu residual=False).to(conf['device']) elif conf['model_name'] == 'PLP': model = PLP(g=G, num_layers=conf['num_layers'], in_dim=G.ndata['feat'].shape[1], emb_dim=conf['emb_dim'], num_classes=labels.max().item() + 1, activation=F.relu, feat_drop=conf['feat_drop'], attn_drop=conf['attn_drop'], residual=False, byte_idx_train=byte_idx_train, labels_one_hot=labels_one_hot, ptype=conf['ptype'], mlp_layers=conf['mlp_layers']).to(conf['device']) elif conf['model_name'] == 'GraphSAGE': model = GraphSAGE(in_feats=G.ndata['feat'].shape[1], n_hidden=16, n_classes=labels.max().item() + 1, n_layers=1, activation=F.relu, dropout=0.5, aggregator_type=conf['agg_type']).to(conf['device']) elif conf['model_name'] == 'APPNP': model = APPNP(g=G, in_feats=G.ndata['feat'].shape[1], hiddens=[64], n_classes=labels.max().item() + 1, activation=F.relu, feat_drop=0.5, edge_drop=0.5, alpha=0.1, k=10).to(conf['device']) elif conf['model_name'] == 'LogReg': model = MLP(num_layers=1, input_dim=G.ndata['feat'].shape[1], hidden_dim=None, output_dim=labels.max().item() + 1, dropout=0).to(conf['device']) elif conf['model_name'] == 'MLP': model = MLP(num_layers=2, input_dim=G.ndata['feat'].shape[1], hidden_dim=conf['hidden'], output_dim=labels.max().item() + 1, dropout=conf['dropout']).to(conf['device']) else: raise ValueError(f'Undefined Model.') return model
def get_model(self, model_cfg): model = MLP(featureSize=model_cfg.feature_size) return model
def run_episode(strategies, policy, beta, device, num_worker): states, actions = [], [] # all strategies use same initial training data and model weights reinit_seed(prop.RANDOM_SEED) if prop.MODEL == "MLP": model = MLP().apply(weights_init).to(device) if prop.MODEL == "CNN": model = CNN().apply(weights_init).to(device) if prop.MODEL == "RESNET18": model = models.resnet.ResNet18().to(device) init_weights = deepcopy(model.state_dict()) # re-init seed was here before use_learner = True if np.random.rand(1) > beta else False if use_learner: policy = policy.to( device) # load policy only when learner is used for states dataset_pool, valid_dataset, test_dataset = get_policy_training_splits() train_dataset, pool_dataset = stratified_split_dataset( dataset_pool, prop.INIT_SIZE, prop.NUM_CLASSES) # Initial sampling if prop.SINGLE_HEAD: my_strategies = [] for StrategyClass in strategies: my_strategies.append( StrategyClass(dataset_pool, valid_dataset, test_dataset)) if prop.CLUSTER_EXPERT_HEAD: UncertaintyStrategieClasses, DiversityStrategieClasses = strategies un_strategies = [] di_strategies = [] for StrategyClass in UncertaintyStrategieClasses: un_strategies.append( StrategyClass(dataset_pool, valid_dataset, test_dataset)) for StrategyClass in DiversityStrategieClasses: di_strategies.append( StrategyClass(dataset_pool, valid_dataset, test_dataset)) if prop.CLUSTERING_AUX_LOSS_HEAD: my_strategies = [] for StrategyClass in strategies: my_strategies.append( StrategyClass(dataset_pool, valid_dataset, test_dataset)) init_acc = train_validate_model(model, device, train_dataset, valid_dataset, test_dataset) t = trange(1, prop.NUM_ACQS + 1, desc="Aquisitions (size {})".format(prop.ACQ_SIZE), leave=True) for acq_num in t: subset_ind = np.random.choice(a=len(pool_dataset), size=prop.K, replace=False) pool_subset = make_tensordataset(pool_dataset, subset_ind) if prop.CLUSTER_EXPERT_HEAD: un_sel_ind = expert(acq_num, model, init_weights, un_strategies, train_dataset, pool_subset, valid_dataset, test_dataset, device) di_sel_ind = expert(acq_num, model, init_weights, un_strategies, train_dataset, pool_subset, valid_dataset, test_dataset, device) state, action = get_state_action(model, train_dataset, pool_subset, un_sel_ind=un_sel_ind, di_sel_ind=di_sel_ind) if prop.SINGLE_HEAD: sel_ind = expert(acq_num, model, init_weights, my_strategies, train_dataset, pool_subset, valid_dataset, test_dataset, device) state, action = get_state_action(model, train_dataset, pool_subset, sel_ind=sel_ind) if prop.CLUSTERING_AUX_LOSS_HEAD: sel_ind = expert(acq_num, model, init_weights, my_strategies, train_dataset, pool_subset, valid_dataset, test_dataset, device) state, action = get_state_action(model, train_dataset, pool_subset, sel_ind=sel_ind, clustering=None) # not implemented states.append(state) actions.append(action) if use_learner: with torch.no_grad(): if prop.SINGLE_HEAD: policy_outputs = policy(state.to(device)).flatten() sel_ind = torch.topk(policy_outputs, prop.ACQ_SIZE)[1].cpu().numpy() if prop.CLUSTER_EXPERT_HEAD: policy_output_uncertainty, policy_output_diversity = policy( state.to(device)) # clustering_space = policy_output_diversity.reshape(prop.K, prop.POLICY_OUTPUT_SIZE) # one topk for uncertainty, one topk for diversity diversity_selection = torch.topk( policy_output_diversity.reshape(prop.K), int(prop.ACQ_SIZE / 2.0))[1].cpu().numpy() uncertainty_selection = torch.topk( policy_output_uncertainty.reshape(prop.K), int(prop.ACQ_SIZE / 2.0))[1].cpu().numpy() sel_ind = (uncertainty_selection, diversity_selection) if prop.CLUSTERING_AUX_LOSS_HEAD: # not implemented policy_outputs = policy(state.to(device)).flatten() sel_ind = torch.topk(policy_outputs, prop.ACQ_SIZE)[1].cpu().numpy() if prop.SINGLE_HEAD: q_idxs = subset_ind[sel_ind] # from subset to full pool if prop.CLUSTER_EXPERT_HEAD: unified_sel_ind = np.concatenate((sel_ind[0], sel_ind[1])) q_idxs = subset_ind[unified_sel_ind] # from subset to full pool remaining_ind = list(set(np.arange(len(pool_dataset))) - set(q_idxs)) sel_dataset = make_tensordataset(pool_dataset, q_idxs) train_dataset = concat_datasets(train_dataset, sel_dataset) pool_dataset = make_tensordataset(pool_dataset, remaining_ind) test_acc = train_validate_model(model, device, train_dataset, valid_dataset, test_dataset) return states, actions
val_size=args.val_size, random_seed=args.random_seed, ) os.makedirs('losses/', exist_ok=True) if args.model.lower() == 'gbdt': from models.GBDT import GBDT model = GBDT(depth=args.depth) model.fit(X, y, train_mask, val_mask, test_mask, cat_features=cat_features, num_epochs=args.num_features, patience=args.patience, learning_rate=args.learning_rate, plot=False, verbose=False, loss_fn=args.loss_fn) elif args.model.lower() == 'mlp': from models.MLP import MLP model = MLP(task=args.task) min_rmse_epoch, accuracies = model.fit(X, y, train_mask, val_mask, test_mask, cat_features=cat_features, num_epochs=args.num_epochs, patience=args.patience, learning_rate=args.learning_rate, hidden_dim=args.hidden_dim, logging_epochs=args.logging_steps, loss_fn=args.loss_fn) model.plot(accuracies, legend=['Train', 'Val', 'Test'], title='MLP RMSE', output_fn='mlp_losses.pdf') elif args.model.lower() == 'gnn': from models.GNN import GNN model = GNN(heads=args.heads, feat_drop=args.feat_drop, attn_drop=args.attn_drop) min_rmse_epoch, accuracies = model.fit(networkx_graph, X, y, train_mask, val_mask, test_mask, cat_features=cat_features, num_epochs=args.num_epochs, patience=args.patience, learning_rate=args.learning_rate, hidden_dim=args.hidden_dim, logging_epochs=args.logging_steps, optimize_node_features=args.input_grad, loss_fn=args.loss_fn)
def classifier_selection(self): """ Function that instanciates classifiers :arg self (Trainer): instance of the class :return model (Classifier): Selected model when self.classfier is 1 model1 (Classifier): First selected model when self.classfier is 2 model2 (Classifier): Second selected model when self.classfier is 2 classifier_list (list): List with selected classifier names """ if self.classifier == 'SVM': classifier_list = ['SVM'] if self.classifier_type == 1: model = SVMClassifier(cv=self.cross_validation) elif self.classifier_type == 2: model1 = SVMClassifier(cv=self.cross_validation) model2 = SVMClassifier(cv=self.cross_validation) elif self.classifier == 'LogisticRegressor': classifier_list = ['LogisticRegressor'] model = LogisticRegressor() if self.classifier_type == 1: model = LogisticRegressor(cv=self.cross_validation) elif self.classifier_type == 2: model1 = LogisticRegressor(cv=self.cross_validation) model2 = LogisticRegressor(cv=self.cross_validation) elif self.classifier == 'MLP': classifier_list = ['MLP'] if self.classifier_type == 1: model = MLP(cv=self.cross_validation) elif self.classifier_type == 2: model1 = MLP(cv=self.cross_validation) model2 = MLP(cv=self.cross_validation) elif self.classifier == 'RandomForest': classifier_list = ['RandomForest'] model = RandomForest() if self.classifier_type == 1: model = RandomForest(cv=self.cross_validation) elif self.classifier_type == 2: model1 = RandomForest(cv=self.cross_validation) model2 = RandomForest(cv=self.cross_validation) elif self.classifier == 'RBF': classifier_list = ['RBF'] if self.classifier_type == 1: model = RBFClassifier(cv=self.cross_validation) elif self.classifier_type == 2: model1 = RBFClassifier(cv=self.cross_validation) model2 = RBFClassifier(cv=self.cross_validation) elif self.classifier == 'Fisher': classifier_list = ['Fisher'] if self.classifier_type == 1: model = FisherDiscriminant(cv=self.cross_validation) elif self.classifier_type == 2: model1 = FisherDiscriminant(cv=self.cross_validation) model2 = FisherDiscriminant(cv=self.cross_validation) elif self.classifier == 'all': classifier_list = ['SVM', 'MLP', 'LogisticRegressor', 'RandomForest', 'RBF', 'Fischer'] if self.classifier_type == 1: model_SVM = SVMClassifier(cv=self.cross_validation) model_MLP = MLP(cv=self.cross_validation) model_Logit = LogisticRegressor(cv=self.cross_validation) model_Forest = RandomForest(cv=self.cross_validation) model_RBF = RBFClassifier(cv=self.cross_validation) model_Fischer = FisherDiscriminant(cv=self.cross_validation) model = [model_SVM, model_MLP, model_Logit, model_Forest, model_RBF, model_Fischer] elif self.classifier_type == 2: model_SVM = SVMClassifier(cv=self.cross_validation) model_MLP = MLP(cv=self.cross_validation) model_Logit = LogisticRegressor(cv=self.cross_validation) model_Forest = RandomForest(cv=self.cross_validation) model_RBF = RBFClassifier(cv=self.cross_validation) model_Fischer = FisherDiscriminant(cv=self.cross_validation) model1 = [model_SVM, model_MLP, model_Logit, model_Forest, model_RBF, model_Fischer] model2 = copy.deepcopy(model1) else: raise SyntaxError('Invalid model name') if self.classifier_type == 1: return model, classifier_list elif self.classifier_type == 2: return model1, model2, classifier_list
def active_learn(exp_num, StrategyClass, subsample): # all strategies use same initial training data and model weights reinit_seed(prop.RANDOM_SEED) test_acc_list = [] if prop.MODEL.lower() == "mlp": model = MLP().apply(weights_init).to(device) if prop.MODEL.lower() == "cnn": model = CNN().apply(weights_init).to(device) if prop.MODEL.lower() == "resnet18": model = models.resnet.ResNet18().to(device) init_weights = copy.deepcopy(model.state_dict()) reinit_seed(exp_num * 10) dataset_pool, valid_dataset, test_dataset = get_data_splits() train_dataset, pool_dataset = stratified_split_dataset( dataset_pool, 2 * prop.NUM_CLASSES, prop.NUM_CLASSES) # # initial data strategy = StrategyClass(dataset_pool, valid_dataset, test_dataset, device) # calculate the overlap of strategy with other strategies strategies = [ MCDropoutSampling, EnsembleSampling, EntropySampling, LeastConfidenceSampling, CoreSetAltSampling, BadgeSampling ] overlapping_strategies = [] for StrategyClass in strategies: overlapping_strategies.append( StrategyClass(dataset_pool, valid_dataset, test_dataset)) t = trange(1, prop.NUM_ACQS + 1, desc="Aquisitions (size {})".format(prop.ACQ_SIZE), leave=True) for acq_num in t: model.load_state_dict(init_weights) test_acc = train_validate_model(model, device, train_dataset, valid_dataset, test_dataset) test_acc_list.append(test_acc) if subsample: subset_ind = np.random.choice(a=len(pool_dataset), size=prop.K, replace=False) pool_subset = make_tensordataset(pool_dataset, subset_ind) sel_ind, remain_ind = strategy.query(prop.ACQ_SIZE, model, train_dataset, pool_subset) q_idxs = subset_ind[sel_ind] # from subset to full pool remaining_ind = list( set(np.arange(len(pool_dataset))) - set(q_idxs)) sel_dataset = make_tensordataset(pool_dataset, q_idxs) train_dataset = concat_datasets(train_dataset, sel_dataset) pool_dataset = make_tensordataset(pool_dataset, remaining_ind) else: # all strategies work on k-sized windows in semi-batch setting sel_ind, remaining_ind = strategy.query(prop.ACQ_SIZE, model, train_dataset, pool_dataset) sel_dataset = make_tensordataset(pool_dataset, sel_ind) pool_dataset = make_tensordataset(pool_dataset, remaining_ind) train_dataset = concat_datasets(train_dataset, sel_dataset) logging.info( "Accuracy for {} sampling and {} acquisition is {}".format( strategy.name, acq_num, test_acc)) return test_acc_list
def create_model(mode='train', model_type='transformer'): if model_type == 'transformer': return SpeechTransformer(mode=mode, drop_rate=hparams.transformer_drop_rate) elif model_type == 'mlp': return MLP(mode, hparams.mlp_dropout_rate)
def __init__(self, env, args, device='cpu'): """ Instantiate an MFEC Agent ---------- env: gym.Env gym environment to train on args: args class from argparser args are from from train.py: see train.py for help with each arg device: string 'cpu' or 'cuda:0' depending on use_cuda flag from train.py """ self.environment_type = args.environment_type self.env = env self.actions = range(self.env.action_space.n) self.frames_to_stack = args.frames_to_stack self.Q_train_algo = args.Q_train_algo self.use_Q_max = args.use_Q_max self.force_knn = args.force_knn self.weight_neighbors = args.weight_neighbors self.delta = args.delta self.device = device self.rs = np.random.RandomState(args.seed) # Hyperparameters self.epsilon = args.initial_epsilon self.final_epsilon = args.final_epsilon self.epsilon_decay = args.epsilon_decay self.gamma = args.gamma self.lr = args.lr self.q_lr = args.q_lr # Autoencoder for state embedding network self.vae_batch_size = args.vae_batch_size # batch size for training VAE self.vae_epochs = args.vae_epochs # number of epochs to run VAE self.embedding_type = args.embedding_type self.SR_embedding_type = args.SR_embedding_type self.embedding_size = args.embedding_size self.in_height = args.in_height self.in_width = args.in_width if self.embedding_type == 'VAE': self.vae_train_frames = args.vae_train_frames self.vae_loss = VAELoss() self.vae_print_every = args.vae_print_every self.load_vae_from = args.load_vae_from self.vae_weights_file = args.vae_weights_file self.vae = VAE(self.frames_to_stack, self.embedding_size, self.in_height, self.in_width) self.vae = self.vae.to(self.device) self.optimizer = get_optimizer(args.optimizer, self.vae.parameters(), self.lr) elif self.embedding_type == 'random': self.projection = self.rs.randn( self.embedding_size, self.in_height * self.in_width * self.frames_to_stack).astype(np.float32) elif self.embedding_type == 'SR': self.SR_train_algo = args.SR_train_algo self.SR_gamma = args.SR_gamma self.SR_epochs = args.SR_epochs self.SR_batch_size = args.SR_batch_size self.n_hidden = args.n_hidden self.SR_train_frames = args.SR_train_frames self.SR_filename = args.SR_filename if self.SR_embedding_type == 'random': self.projection = np.random.randn( self.embedding_size, self.in_height * self.in_width).astype(np.float32) if self.SR_train_algo == 'TD': self.mlp = MLP(self.embedding_size, self.n_hidden) self.mlp = self.mlp.to(self.device) self.loss_fn = nn.MSELoss(reduction='mean') params = self.mlp.parameters() self.optimizer = get_optimizer(args.optimizer, params, self.lr) # QEC self.max_memory = args.max_memory self.num_neighbors = args.num_neighbors self.qec = QEC(self.actions, self.max_memory, self.num_neighbors, self.use_Q_max, self.force_knn, self.weight_neighbors, self.delta, self.q_lr) #self.state = np.empty(self.embedding_size, self.projection.dtype) #self.action = int self.memory = [] self.print_every = args.print_every self.episodes = 0