def main(): n_out = 10 learning_rate=0.01 n_iter=10000 batch_size=20 validate_frequency = 100 hiddenlayer_params = [500] activate_func = tf.tanh if 1: mnist = mnist_input_data.read_data_sets("../data/MNIST_data/", one_hot=True) model = MLP(mnist, n_out, learning_rate, n_iter, batch_size, \ validate_frequency, hiddenlayer_params, activate_func) model.train_mnist()
def test_tempalte_contraction_mlp(): gaussian = Gaussian([2]) sList = [MLP(1, 10), MLP(1, 10), MLP(1, 10), MLP(1, 10)] tList = [MLP(1, 10), MLP(1, 10), MLP(1, 10), MLP(1, 10)] realNVP = RealNVP([2], sList, tList, gaussian) x = realNVP.prior(10) mask = realNVP.createMask(["channel"] * 4, ifByte=1) print("original") #print(x) z = realNVP._generateWithContraction(x, realNVP.mask, realNVP.mask_, 0, True) print("Forward") #print(z) zp = realNVP._inferenceWithContraction(z, realNVP.mask, realNVP.mask_, 0, True) print("Backward") #print(zp) assert_array_almost_equal(realNVP._generateLogjac.data.numpy(), -realNVP._inferenceLogjac.data.numpy()) x_data = realNVP.prior(10) y_data = realNVP.prior.logProbability(x_data) print("logProbability") '''
def train(FLAGS): """ Train our embeddings. """ # Get data loaders print("==> Reading and processing the data ... ", end="") train_loader, test_loader, num_unique_words = process_data( data_dir=FLAGS.data_dir, data_file=FLAGS.data_file, vocab_size=FLAGS.vocab_size, window_size=FLAGS.window_size, split_ratio=FLAGS.split_ratio, batch_size=FLAGS.batch_size, ) print("[COMPLETE]") # Initialize model, criterion, loss print("==> Initializing model components ... ", end="") model = MLP( D_in=num_unique_words, embedding_dim=FLAGS.embedding_dim, num_hidden_units=FLAGS.num_hidden_units, window_size=FLAGS.window_size, ) # Objective criterion = torch.nn.CrossEntropyLoss() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr) print("[COMPLETE]") # Train the model print("==> Training the model ... [IN PROGRESS]") model = training_procedure( model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, num_epochs=FLAGS.num_epochs, learning_rate=FLAGS.lr, decay_rate=FLAGS.decay_rate, max_grad_norm=FLAGS.max_grad_norm, ) print("\n[COMPLETE]") # Save the model print("==> Saving the model ... [IN PROGRESS]") torch.save(model, os.path.join(basedir, FLAGS.data_dir, "model.pt")) print("\n[COMPLETE]")
def test_invertible_2d_cuda(): Nlayers = 4 Hs = 10 Ht = 10 sList = [MLP(2, Hs) for _ in range(Nlayers)] tList = [MLP(2, Ht) for _ in range(Nlayers)] masktypelist = ['channel', 'channel'] * (Nlayers // 2) #assamble RNVP blocks into a TEBD layer prior = Gaussian([4, 4]) layers = [ RealNVP([2, 2], sList, tList, Gaussian([2, 2]), masktypelist) for _ in range(4) ] model = TEBD(2, [2, 2], 4, layers, prior).cuda() z = model.prior(10).cuda() print("original") x = model.generate(z) print("Forward") zp = model.inference(x) print("Backward") assert_array_almost_equal(z.data.cpu().numpy(), zp.data.cpu().numpy()) saveDict = model.saveModel({}) torch.save(saveDict, './saveNet.testSave') sListp = [MLP(2, Hs) for _ in range(Nlayers)] tListp = [MLP(2, Ht) for _ in range(Nlayers)] masktypelistp = ['channel', 'channel'] * (Nlayers // 2) #assamble RNVP blocks into a TEBD layer priorp = Gaussian([4, 4]) layersp = [ RealNVP([2, 2], sListp, tListp, Gaussian([2, 2]), masktypelistp) for _ in range(4) ] modelp = TEBD(2, [2, 2], 4, layersp, priorp) saveDictp = torch.load('./saveNet.testSave') modelp.loadModel(saveDictp) xp = modelp.generate(z.cpu()) assert_array_almost_equal(xp.data.numpy(), x.data.cpu().numpy())
def train(FLAGS): """ Train our embeddings. """ # Get data loaders print ("==> Reading and processing the data ... ", end="") train_loader, test_loader, num_unique_words = process_data( data_dir=FLAGS.data_dir, data_file=FLAGS.data_file, vocab_size=FLAGS.vocab_size, window_size=FLAGS.window_size, split_ratio=FLAGS.split_ratio, batch_size=FLAGS.batch_size, ) print ("[COMPLETE]") # Initialize model, criterion, loss print ("==> Initializing model components ... ", end="") model = MLP( D_in=num_unique_words, embedding_dim=FLAGS.embedding_dim, num_hidden_units=FLAGS.num_hidden_units, window_size=FLAGS.window_size, ) # Objective criterion = torch.nn.CrossEntropyLoss() # Optimizer optimizer = torch.optim.Adam(model.parameters(), lr=FLAGS.lr) print ("[COMPLETE]") # Train the model print ("==> Training the model ... [IN PROGRESS]") model = training_procedure( model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, num_epochs=FLAGS.num_epochs, learning_rate=FLAGS.lr, decay_rate=FLAGS.decay_rate, max_grad_norm=FLAGS.max_grad_norm, ) print ("\n[COMPLETE]") # Save the model print ("==> Saving the model ... [IN PROGRESS]") torch.save(model, os.path.join(basedir, FLAGS.data_dir, "model.pt")) print ("\n[COMPLETE]")
def main(): np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed) data = locate('get_{}'.format(args.dataset))(args) train_data, val_data, test_data = data if args.dataset in 'mnist': model = MLP(args) elif args.dataset in 'cifar10' or args.dataset in 'cifar100': model = Resnet18(args) else: raise Exception('error') weight_arch = data_selection(data[0]) architect = Architect(model, weight_arch, args) train_loader = DataLoader(train_data, batch_size = args.batch_size, shuffle = True, drop_last = True) val_loader = DataLoader(val_data, batch_size = 64, shuffle = True, drop_last = False) test_loader = DataLoader(test_data, batch_size = 64, shuffle = True, drop_last = False) optimizer = torch.optim.SGD( model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) print(optimizer.state) scheduler = torch.optim.lr_scheduler.CosineAnnealingLR( optimizer, float(args.n_epochs), eta_min=args.learning_rate_min) for epoch in range(args.n_epochs): scheduler.step() lr = scheduler.get_lr()[0] logging.info('epoch %d lr %e', epoch, lr) train_acc, train_obj = Train(train_loader, val_data, model, args, architect, weight_arch, optimizer) logging.info('train_acc %f', train_acc) # validation valid_acc, valid_obj = infer(val_loader, model) logging.info('valid_acc %f', valid_acc) utils.save(model, os.path.join(args.save, 'weights.pt'))
def main(_): from tensorflow.examples.tutorials.mnist import input_data tf.reset_default_graph() mnist = input_data.read_data_sets("MNIST_data/", one_hot=True) config = Config() sess = tf.Session() mlp = MLP(config, sess) mlp.fit(mnist.train.images, mnist.train.labels) print("[*] Finished Training") print("Test accuracy: {}".format(get_accuracy([mnist.test.images, mnist.test.labels], mlp))) return
def main(): dim = 3 net = MLP(input_dim=dim, hidden_dim=args.hidden_dim, output_dim=dim) net.load_state_dict(torch.load(args.log_dir + '/net_state_dict.pt'), strict=False) # loads trained model y0 = np.array( [4, 3, -2]) # define center of neighborhood for initial starting points y0_l = [initial(y0) for _ in range(args.num_traj)] # generate random starting points def criterion(y, y_): return np.mean((y - y_)**2) print('test loss:', test_loss(net, criterion, y0_l))
def load_model(num_layers: int, features: int, num_rel_types: int, channels: int, embeddings: int, edge_size: str, use_pretrained: str, edges: bool = False) -> Model: print('[.] Building model...') if use_pretrained: model = MLP(num_features=features) else: model = IRGCNModel(num_layers=num_layers, num_rel_types=num_rel_types, channels=channels, embeddings=embeddings, relation_type=edge_size) A_in = [ Input(shape=(None, ), sparse=edge_size) for _ in range(num_rel_types) ] X_in = Input(shape=(features, )) inputs = [X_in, A_in] if edges: E_in = Input(shape=(3, )) inputs.append(E_in) output = model(inputs) print('[+] Model has been built') print(output.shape) return Model(inputs, output)
def __init__(self, input_shape, vol): super(bgrl, self).__init__(500) self.input_shape = input_shape self.vol = vol self.gamma = 1.0 # control the effect of softmax self.losses = np.zeros((self.max_iter, )) self.vals = np.zeros((self.max_iter, )) self.device = torch.device("cuda") self.mu = MLP(input_shape, hidden_dim=64, num_outputs=1).to(device=self.device) self.nu = MLP(input_shape, hidden_dim=64, num_outputs=1).to(device=self.device) self.tf_optim = Adam(list(self.mu.parameters()) + list(self.nu.parameters()), lr=0.002)
def __init__(self, model: Model, loss: IRGCNLoss, small: bool=True): self.model = model self.loss = loss self.small = small if small: self.model2 = MLP(num_features=50)
def main(): args = arg_parse() graphs = load_data.read_graphfile(args.datadir, args.dataset, max_nodes=args.max_nodes) # if 'feat_dim' in graphs[0].graph: # print('Using node features') # input_dim = graphs[0].graph['feat_dim'] # else: # print('Using constant features') # featgen_const = featgen.ConstFeatureGen(np.ones(args.input_dim, dtype=float)) # for G in graphs: # featgen_const.gen_node_features(G) train_dataset, val_dataset, test_dataset, max_num_nodes, input_dim, assign_input_dim = \ prepare_data(graphs, args, max_nodes=args.max_nodes) if args.doVAE == 1: modelGCV = GATcoarseVAE(input_dim, args.hidden_dim, args.GAT_hid_dim, args.dropout, args.alpha) train_embed, train_label, test_embed, test_label = trainGCV(train_dataset, test_dataset, modelGCV, args) with open('NCI109_VAE.pickle','wb') as f: pickle.dump([train_embed, train_label, test_embed, test_label], f) else: with open('NCI109_VAE.pickle','rb') as f: train_embed, train_label, test_embed, test_label = pickle.load(f) modelMLP = MLP(args.hidden_dim,args.num_classes) test_acc, test_loss = trainMLP(train_embed, train_label, test_embed, test_label, modelMLP, args) print("Test accuarcy:", test_acc)
def test_invertible_1d(): Nlayers = 4 Hs = 10 Ht = 10 sList = [MLP(2, Hs) for _ in range(Nlayers)] tList = [MLP(2, Ht) for _ in range(Nlayers)] masktypelist = ['channel', 'channel'] * (Nlayers // 2) #assamble RNVP blocks into a TEBD layer prior = Gaussian([8]) layers = [ RealNVP([2], sList, tList, Gaussian([2]), masktypelist) for _ in range(6) ] model = MERA(1, 2, 8, layers, prior) z = prior(4) x = model.generate(z, ifLogjac=True) zz = model.inference(x, ifLogjac=True) assert_array_almost_equal(z.data.numpy(), zz.data.numpy()) print(model._generateLogjac) print(model._inferenceLogjac) assert_array_almost_equal(model._generateLogjac.data.numpy(), -model._inferenceLogjac.data.numpy()) saveDict = model.saveModel({}) torch.save(saveDict, './saveNet.testSave') Nlayersp = 4 Hsp = 10 Htp = 10 sListp = [MLP(2, Hsp) for _ in range(Nlayersp)] tListp = [MLP(2, Htp) for _ in range(Nlayersp)] masktypelistp = ['channel', 'channel'] * (Nlayersp // 2) #assamble RNVP blocks into a TEBD layer priorp = Gaussian([8]) layersp = [ RealNVP([2], sListp, tListp, Gaussian([2]), masktypelistp) for _ in range(6) ] modelp = MERA(1, 2, 8, layersp, priorp) saveDictp = torch.load('./saveNet.testSave') modelp.loadModel(saveDictp) xp = modelp.generate(z) assert_array_almost_equal(xp.data.numpy(), x.data.numpy())
class wgan(Method): def __init__(self, input_shape, vol): super(wgan, self).__init__(2000) self.input_shape = input_shape self.vol = vol self.clamp_max = 0.01 self.losses = np.zeros((self.max_iter, )) self.vals = np.zeros((self.max_iter, )) self.device = torch.device("cuda") self.disc = MLP(input_shape, hidden_dim=64, num_outputs=1).to(device=self.device) self.disc_optim = Adam(self.disc.parameters(), lr=0.002) def update_parameters(self, As, Bs, shuffle=True): if shuffle: np.random.shuffle(As) np.random.shuffle(Bs) As = torch.FloatTensor(As).to(self.device) Bs = torch.FloatTensor(Bs).to(self.device) VAs = self.disc(As) VBs = self.disc(Bs) loss1 = VAs.mean() loss2 = -VBs.mean() self.disc_optim.zero_grad() loss1.backward() loss2.backward() self.disc_optim.step() for p in self.disc.parameters(): p.data.clamp_(-self.clamp_max, self.clamp_max) return (loss1 + loss2).item() def estimate(self, As, Bs): As = torch.FloatTensor(As).to(self.device) Bs = torch.FloatTensor(Bs).to(self.device) VAs = self.disc(As) VBs = self.disc(Bs) rv = torch.abs(VAs.mean() - VBs.mean()) return rv.squeeze().detach().cpu().numpy() def train(self, As, Bs): for i in range(self.max_iter): loss = self.update_parameters(As, Bs) self.losses[i] = loss self.vals[i] = self.estimate(As, Bs)
def main(args): embedder_hidden_sizes = args.embedder_hidden_sizes embedded_dim = args.embedded_dim lstm_size = args.lstm_size n_shuffle = args.n_shuffle clf_hidden_sizes = args.clf_hidden_sizes policy_hidden_sizes = args.policy_hidden_sizes shared_dim = args.shared_dim nsteps = args.nsteps n_envs = args.n_envs data_type = args.data_type r_cost = args.r_cost # TODO data load first, classifier defining and declare env traindata, valdata, testdata = data_load(data_type=args.data_type, random_seed=args.random_seed) input_dim = traindata.n_features + 1 clf_output_size = traindata.n_classes if traindata.n_classes > 2 else 1 encoder = SetEncoder(input_dim, traindata.n_features, embedder_hidden_sizes, embedded_dim, lstm_size, n_shuffle, normalize=args.normalize, dropout=args.dropout, p=args.p) dfsnet = DFSNet(encoder=encoder, classifier=MLP(lstm_size + embedded_dim, clf_hidden_sizes, clf_output_size, dropout=args.dropout, p=args.p, batch_norm=args.batchnorm), policy=DuelingNet(lstm_size + embedded_dim, policy_hidden_sizes, shared_dim, traindata.n_actions)) dfsnet.to(args.device) step_runner = StepRunner(dfsnet, args) env = Env(args, n_envs, r_cost, traindata, step_runner.classify) valenv = Env(args, n_envs, r_cost, valdata, step_runner.classify) testenv = Env(args, n_envs, r_cost, testdata, step_runner.classify) env.classify = step_runner.classify valenv.classify = step_runner.classify testenv.classify = step_runner.classify learn_start = time() learn(step_runner, args, env, valenv, nsteps=nsteps, total_steps=int(5e6), scheduler=args.scheduler) learn_elapsed = time() - learn_start dfsnet.eval() test_and_record(step_runner, args, env, valenv, testenv) print(learn_elapsed)
def main(args): # Create model directory if not os.path.exists(args.model_path): os.makedirs(args.model_path) # Build data loader #dataset,targets= load_dataset() dataset = np.load('dataset.npy') targets = np.load('targets.npy') # Build the models mlp = MLP(args.input_size, args.output_size) if torch.cuda.is_available(): mlp.cuda() # Loss and Optimizer criterion = nn.MSELoss() optimizer = torch.optim.Adagrad(mlp.parameters()) # Train the Models total_loss = [] print(len(dataset)) print(len(targets)) sm = 100 # start saving models after 100 epochs for epoch in range(args.num_epochs): print("epoch" + str(epoch)) avg_loss = 0 for i in tqdm(range(0, len(dataset), args.batch_size)): # Forward, Backward and Optimize mlp.zero_grad() bi, bt = get_input(i, dataset, targets, args.batch_size) bi = to_var(bi) bt = to_var(bt) bo = mlp(bi) loss = criterion(bo, bt) avg_loss = avg_loss + loss.data.item() loss.backward() optimizer.step() print("--average loss:") print(avg_loss / (len(dataset) / args.batch_size)) total_loss.append(avg_loss / (len(dataset) / args.batch_size)) # Save the models if epoch == sm: model_path = 'mlp_100_4000_PReLU_ae_dd' + str(sm) + '.pkl' torch.save(mlp.state_dict(), os.path.join(args.model_path, model_path)) sm = sm + 50 # save model after every 50 epochs from 100 epoch ownwards torch.save(total_loss, 'total_loss.dat') model_path = 'mlp_100_4000_PReLU_ae_dd_final.pkl' torch.save(mlp.state_dict(), os.path.join(args.model_path, model_path))
def __init__(self, test=False): # device if torch.cuda.is_available(): self.device = torch.device('cuda') else : self.device = torch.device('cpu') self.model = MLP(state_dim=4,action_num=2,hidden_dim=256).to(self.device) if test: self.load('./pg_best.cpt') # discounted reward self.gamma = 0.99 # optimizer self.optimizer = torch.optim.Adam(self.model.parameters(), lr=3e-3) # saved rewards and actions self.memory = Memory() self.tensorboard = TensorboardLogger('./')
def mpl(root, path_train, path_test): data_set_train = dataset_MLP(root + path_train, train=True) data_set_test = dataset_MLP(root + path_test, train=False) trainloader = DataLoader(data_set_train, batch_size=1000, shuffle=True) testloader = DataLoader(data_set_test, batch_size=1000) model = MLP() criterion = t.nn.CrossEntropyLoss() lr = 0.01 optimizer = t.optim.SGD(model.parameters(), lr, momentum=0.4) for epoch in range(240): for _, (data, label) in enumerate(trainloader): model.train() optimizer.zero_grad() score = model(data) loss = criterion(score, label) loss.backward() optimizer.step() print("Epoch:%d loss:%f" % (epoch, loss.mean())) res = [] for _, (data) in enumerate(testloader): model.eval() predict = model(data) predict = predict.detach().numpy().tolist() res += predict res = np.array(res) ans = np.argmax(res, axis=1) data_set_test.save_res(ans, "./images/res_MLP.csv")
def main(cfg): if cfg['model'] == 'mlp': net = MLP(300, 768, cfg['class_num']) elif cfg['model'] == 'cnn': net = CNN(300, 768, cfg['class_num']) elif cfg['model'] == 'lstm': net = LSTM(300, cfg['class_num'], cfg['device']) elif cfg['model'] == 'gru': net = GRU(300, cfg['class_num'], cfg['device']) else: raise Exception(f'model {args.model} not available') if cfg['device'] == 'cuda': if len(cfg['gpu_ids']) == 1: torch.cuda.set_device(cfg['gpu_ids'][0]) net = net.cuda() else: net = net.cuda() net = nn.DataParallel(net, device_ids=cfg['gpu_ids']) torch.backends.cudnn.benchmark = True if cfg['mode'] == 'train': train(cfg, net) elif cfg['mode'] == 'predict': predict(cfg, net, 'checkpoints/{}.pth'.format(cfg['model']))
class PredictionService: model = MLP() chainer.serializers.load_npz("model.npz", model) @staticmethod def predict(x: np.ndarray): with chainer.using_config("train", False): p = PredictionService.model(x[np.newaxis, ...])[0].array.argmax() return p
def load_model( MODEL_NAME, BATCH_SIZE, DNN_HIDDEN_UNITS, DNN_DROPOUT, DNN_ACTIVATION, L2_REG, INIT_STD, SPAESE_EMBEDDING_DIM, VARLEN_MODE_LIST, feature_index, unique_num_dic, ): embedding_dict = nn.ModuleDict({ feat: nn.Embedding(unique_num_dic[feat], SPAESE_EMBEDDING_DIM, sparse=SPARSE_EMBEDDING) for feat in sparse_features }) for mode in VARLEN_MODE_LIST: for feat in varlen_sparse_features: embedding_dict[f'{feat}__{mode}'] = nn.Embedding( unique_num_dic[feat], SPAESE_EMBEDDING_DIM, sparse=SPARSE_EMBEDDING) linear_embedding_dict = nn.ModuleDict({ feat: nn.Embedding(unique_num_dic[feat], 1, sparse=SPARSE_EMBEDDING) for feat in sparse_features }) for mode in VARLEN_MODE_LIST: for feat in varlen_sparse_features: linear_embedding_dict[f'{feat}__{mode}'] = nn.Embedding( unique_num_dic[feat], 1, sparse=SPARSE_EMBEDDING) if MODEL_NAME == 'MLP': dnn_input_len = len(dense_features) + len(sparse_features) * SPAESE_EMBEDDING_DIM \ + len(varlen_sparse_features) * len(VARLEN_MODE_LIST) * SPAESE_EMBEDDING_DIM model = MLP( dnn_input=dnn_input_len, dnn_hidden_units=DNN_HIDDEN_UNITS, dnn_dropout=DNN_DROPOUT, activation=DNN_ACTIVATION, use_bn=True, l2_reg=L2_REG, init_std=INIT_STD, device=DEVICE, feature_index=feature_index, embedding_dict=embedding_dict, dense_features=dense_features, sparse_features=sparse_features, varlen_sparse_features=varlen_sparse_features, varlen_mode_list=VARLEN_MODE_LIST, embedding_size=SPAESE_EMBEDDING_DIM, batch_size=BATCH_SIZE, ) return model
def create_model(graph: Graph, cross_features: Optional[torch.Tensor], config: Config) -> Model: """Create an instance of Gretel Args: graph (Graph): graph cross_features ([type]): available cross features between nodes [n_node, n_node, d_cross] Can be useful to show distance with target. config (Config): configuration Returns: Model: the Gretel """ def dimension(tensor, name): if tensor is None: return 0 if tensor.dim() == 1: return 1 elif tensor.dim() == 2: return tensor.shape[1] else: raise ValueError(f"{name} features should be scalar or vectors") d_node = dimension(graph.nodes, "graph.nodes") d_edge = dimension(graph.edges, "graph.edges") d_cross = cross_features.shape[1] if cross_features is not None else 0 diffusion_graph_transformer = None if config.initial_edge_transformer and (d_node > 0 or d_edge > 0): diffusion_graph_transformer = EdgeTransformer(d_node, d_edge, 1) else: print("No initial edge transformer.") multichannel_diffusion = MultiDiffusion(config.diffusion_k_hops, config.diffusion_hidden_dimension, config.parametrized_diffusion) double_way_diffusion = 2 if config.double_way_diffusion else 1 d_in_direction_mlp = ( 2 * config.number_observations * config.diffusion_hidden_dimension * double_way_diffusion + 2 * d_node + d_edge + (d_node if config.latent_transformer_see_target else 0) + (2 * d_cross if config.latent_transformer_see_target else 0)) direction_edge_mlp = MLP(d_in_direction_mlp, 1) return Model( diffusion_graph_transformer=diffusion_graph_transformer, multichannel_diffusion=multichannel_diffusion, direction_edge_mlp=direction_edge_mlp, number_observations=config.number_observations, rw_expected_steps=config.rw_expected_steps, rw_non_backtracking=config.rw_non_backtracking, latent_transformer_see_target=config.latent_transformer_see_target, double_way_diffusion=config.double_way_diffusion, diffusion_self_loops=config.diffusion_self_loops, )
def main(): args = parse_args() seed_everything(args.seed) if args.onehot: app_train = joblib.load('../data/05_onehot/application_train.joblib') app_test = joblib.load('../data/05_onehot/application_test.joblib') dims = get_dims({'application_train': app_train}) _, _, cont_dim = dims['application_train'] n_input = cont_dim else: app_train = joblib.load( '../data/03_powertransform/application_train.joblib') app_test = joblib.load( '../data/03_powertransform/application_test.joblib') dims = get_dims({'application_train': app_train}) cat_dims, emb_dims, cont_dim = dims['application_train'] n_input = emb_dims.sum() + cont_dim n_hidden = args.n_hidden # CV skf = StratifiedKFold(n_splits=5) folds = skf.split(app_train['SK_ID_CURR'], app_train['TARGET']) best_models = [] for train_index, val_index in folds: train_dataloader = make_dataloader(app_train, train_index, args.batch_size, onehot=args.onehot) val_dataloader = make_dataloader(app_train, val_index, args.batch_size, onehot=args.onehot) if args.onehot: network = MLPOneHot(n_input, n_hidden) else: network = MLP(cat_dims, emb_dims, n_input, n_hidden) model = LightningModel(network, nn.BCEWithLogitsLoss(), train_dataloader, val_dataloader, args) name = '13_mlp-onehot' if args.onehot else '13_mlp-label' trainer = HomeCreditTrainer(name, args.n_epochs, args.patience) trainer.fit(model) best_model = load_model(model, name, trainer.logger.version) best_models.append(best_model) # Predict test_dataloader = make_dataloader(app_test, None, args.batch_size, train=False, onehot=args.onehot) df_submission = predict(best_models, test_dataloader) filename = '../submission/13_mlp-onehot.csv' if args.onehot else '../submission/13_mlp-label.csv' df_submission.to_csv(filename, index=False)
def main(): env = gym.make('CartPole-v1') obs_dim = env.observation_space.shape[0] act_num = env.action_space.n mlp = MLP(obs_dim, act_num).to(device) if args.load is not None: pretrained_model_path = os.path.join('./save_model/' + str(args.load)) pretrained_model = torch.load(pretrained_model_path) mlp.load_state_dict(pretrained_model) sum_returns = 0. num_episodes = 0 for episode in range(1, 10001): total_reward = 0. obs = env.reset() done = False while not done: if args.render: env.render() action = mlp( torch.Tensor(obs).to(device)).argmax().detach().cpu().numpy() next_obs, reward, done, _ = env.step(action) total_reward += reward obs = next_obs sum_returns += total_reward num_episodes += 1 average_return = sum_returns / num_episodes if num_episodes > 0 else 0.0 if episode % 10 == 0: print('---------------------------------------') print('Episodes:', num_episodes) print('AverageReturn:', average_return) print('---------------------------------------')
def create_model(params, input_dim): model_type = params['model'] hidden1 = int(params['hidden']) depth = int(params['depth']) dropout = float(params['dropout']) degree = int(params['max_degree']) if model_type == 'dense': return MLP(input_dim=input_dim, hidden1=hidden1, dropout=dropout), False elif depth != 0: return Deep_GCN(input_dim=input_dim, hidden1=hidden1, depth=depth, dropout=dropout, degree=degree), True else: return GCN(input_dim=input_dim, hidden1=hidden1, dropout=dropout, degree=degree), True
def model_fn(model_dir): """Load the PyTorch model from the `model_dir` directory.""" print("Loading model.") # First, load the parameters used to create the model. model_info = {} model_info_path = os.path.join(model_dir, 'model_info.pth') with open(model_info_path, 'rb') as f: model_info = torch.load(f) print("model_info: {}".format(model_info)) # Determine the device and construct the model. device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model = MLP(model_info['dim_input'], model_info['dim_hidden'], model_info['dim_output']) # Load the stored model parameters. model_path = os.path.join(model_dir, 'model.pth') with open(model_path, 'rb') as f: model.load_state_dict(torch.load(f)) # prep for testing model.to(device).eval() print("Done loading model.") return model
def maximize_network(x_train, x_test, y_train, y_test): model = MLP() if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() xp = chainer.cuda.cupy x_train = xp.asarray(x_train) x_test = xp.asarray(x_test) y_train = xp.asarray(y_train) y_test = xp.asarray(y_test) else: xp = np optimizer = chainer.optimizers.Adam() optimizer.use_cleargrads() optimizer.setup(model) for i in range(1, args.epoch + 1): for j in range(0, len(x_train), args.batch_size): model.cleargrads() logit = model(x_train[j: j + args.batch_size]) loss = -F.sum(F.log(F.softmax(logit)) * y_train[j: j + args.batch_size]) loss.backward() optimizer.update() accuracy = F.accuracy(model(x_test), y_test) print("epoch {0:02d}, accuracy {1}".format(i, accuracy.data)) indices = np.random.permutation(len(x_train)) x_train = x_train[indices] y_train = y_train[indices] return model
def mlp_inference(): model = MLP() model.load_state_dict(torch.load(config.inference_model_path)) model.eval() dataset = FeatureDataset() dataloader = DataLoader(dataset, batch_size=1, shuffle=False, num_workers=4) counter = 0 index = 0 with torch.no_grad(): for data in dataloader: index += 1 inputs = data['features'] labels = data['action'] outputs = model(inputs) # # probability_distribution = torch.nn.functional.softmax(outputs) prediction = np.argmax(outputs.detach().numpy()) # print('prediction of MLP model is {}'.format(prediction)) # print('label is {}'.format(labels.detach().numpy()[0])) # print('----') if labels.detach().numpy()[0] != prediction: counter += 1 print(index) print('prediction of MLP model is {}'.format(prediction)) print('label is {}'.format(labels.detach().numpy()[0])) print('----') print(counter)
def load_model(save_path): # torch.save(to_save, save_path) model = MLP(len(vocab), HIDDEN_SIZE, num_classes, device=device) checkpoint = torch.load(save_path + '/best_model.pt') model.load_state_dict(checkpoint['model_state_dict']) epoch = checkpoint['epoch'] # move the model to GPU if has one model.to(device) # need this for dropout model.eval() return model
def main(): parser = setup_parser() args = parser.parse_args() subprocess.run(f"mkdir {args.model}", shell=True) torch.manual_seed(42) # device="cuda" if args.gpus == -1 else "cpu" k_data_loaders = create_k_splitted_data_loaders(args) if args.model == "MLP": model = MLP(args) elif args.model == "CNN": model = CNN(args) model.apply(reset_weights) acc_results, logs = [], [] for fold, train_loader, test_loader in k_data_loaders: print(f"FOLD {fold}\n-----------------------------") print("Starting training...") model, log = train_loop(train_loader, model, args) logs.append(log) print("Training process has finished. Saving trained model.") torch.save(model.state_dict(), f"./{args.model}/model_fold_{fold}.pth") print("Starting testing...") correct_rate = test_loop(test_loader, model, fold) acc_results.append(correct_rate) print("Resetting the model weights...") reset_weights(model) print( f"K-FOLD CROSS VALIDATION RESULTS FOR {args.k_folds} FOLDS\n----------------------" ) print(f"Average: {sum(acc_results) / len(acc_results):.3g}%")
def train(config_files, run_number): max_accuracy = [] max_validation_accuracy = [] for n in range(1, 2): X, y = get_data_train(n, config_files, run_number) input_size, hidden_size, output_size = X.shape[1], 16, 8 model = MLP(input_size, hidden_size, output_size) model.to(device) X, y = X.to(device), y.to(device) epochs = 20 accuracy = [] test_accuracy = [] for i in range(epochs): output_i, loss = train_optim(model, y, X) print("epoch {}".format(i)) print("accuracy = ", np.sum(output_i == y.cpu().numpy()) / y.size()) print("loss: {}".format(loss)) accuracy.append((np.sum(output_i == y.cpu().numpy()) / y.size())[0]) if not os.path.isdir('checkpoint'): os.mkdir('checkpoint') torch.save(model.state_dict(), "checkpoint/MLP_model_{}_train.pwf".format(i)) test_accuracy.append(validate(n, config_files, run_number, model)) torch.save(model.state_dict(), "checkpoint/MLP_model_{}_validate.pwf".format(i)) plot_accuracy_n_print(accuracy, max_accuracy, n, run_number, 'train') plot_accuracy_n_print(test_accuracy, max_validation_accuracy, n, run_number, 'validate')
def __init__(self, d, n_msg_layers, n_vote_layers, n_rounds): super(NeuroSAT, self).__init__() self.d = d self.n_rounds = n_rounds self.L_init = torch.nn.Parameter(torch.empty([1, d])) self.C_init = torch.nn.Parameter(torch.empty([1, d])) self.LC_msg = MLP(d, [d for _ in range(n_msg_layers)] + [d]) self.CL_msg = MLP(d, [d for _ in range(n_msg_layers)] + [d]) self.L_update = LayerNormBasicLSTMCell(2 * d, d) self.C_update = LayerNormBasicLSTMCell(d, d) self.L_vote = MLP(d, [d for _ in range(n_vote_layers)] + [1]) self.vote_bias = torch.nn.Parameter(torch.empty([])) self._init_weight() # Metrics self.train_accuracy = pl.metrics.Accuracy()
def train(FLAGS): """ Train our embeddings. """ # Get data loaders print ("==> Reading and processing the data ... ", end="") train_loader, test_loader, num_unique_words, \ num_unique_documents, word_to_idx = process_data( data_dir=FLAGS.data_dir, vocab_size=FLAGS.vocab_size, window_size=FLAGS.window_size, split_ratio=FLAGS.split_ratio, batch_size=FLAGS.batch_size, ) print ("[COMPLETE]") # Load pretrained GloVe embeddings for our vocab embedding_dir = os.path.join(basedir, "../../../../embeddings/glove") embedding_dim = 100 embeddings = get_embeddings( embedding_dir=embedding_dir, embedding_dim=embedding_dim, words=word_to_idx.keys(), ) # Initialize model, criterion, loss print ("==> Initializing model components ... ", end="") model = MLP( D_in_words=num_unique_words, D_in_documents=num_unique_documents, embedding_dim=FLAGS.embedding_dim, num_hidden_units=FLAGS.num_hidden_units, window_size=FLAGS.window_size, embeddings=embeddings, ) # Objective criterion = torch.nn.CrossEntropyLoss() # Optimizer # Only get the parameters with gradients (we freeze our GloVe embeddings) parameters = filter(lambda param: param.requires_grad, model.parameters()) optimizer = torch.optim.Adam(parameters, lr=FLAGS.lr) print ("[COMPLETE]") # Train the model print ("==> Training the model ... [IN PROGRESS]") model = training_procedure( model=model, criterion=criterion, optimizer=optimizer, train_loader=train_loader, test_loader=test_loader, num_epochs=FLAGS.num_epochs, learning_rate=FLAGS.lr, decay_rate=FLAGS.decay_rate, max_grad_norm=FLAGS.max_grad_norm, log_every=FLAGS.log_every, ) print ("\n[COMPLETE]") # Save the model print ("==> Saving the model ... [IN PROGRESS]") torch.save(model, os.path.join(basedir, FLAGS.data_dir, "model.pt")) print ("\n[COMPLETE]")