def __init__(self, output_dim, embedding_dim, hidden_dim, dropout_rate, n_layers, bos, eos, pad, ls_weight, labeldist): super(LM, self).__init__() self.bos, self.eos, self.pad = bos, eos, pad self.embedding = torch.nn.Embedding(output_dim, embedding_dim, padding_idx=pad) self.LSTM = torch.nn.LSTM(embedding_dim, hidden_dim, num_layers=n_layers, batch_first=True, dropout=dropout_rate if n_layers > 1 else 0) # re-init weight_init(self.LSTM) self.output_layer = torch.nn.Linear(hidden_dim, output_dim) self.dropout_layer = torch.nn.Dropout(p=dropout_rate) self.hidden_dim = hidden_dim self.output_dim = output_dim self.dropout_rate = dropout_rate self.n_layers = n_layers # label smoothing hyperparameters self.ls_weight = ls_weight self.labeldist = labeldist if labeldist is not None: self.vlabeldist = cc(torch.from_numpy(np.array(labeldist, dtype=np.float32)))
def init_params(model, args): # - reinitialize all parameters according to default initialization model.apply(utils.weight_reset) # - initialize parameters according to chosen custom initialization (if requested) if hasattr(args, 'init_weight') and not args.init_weight == "standard": utils.weight_init(model, strategy="xavier_normal") if hasattr(args, 'init_bias') and not args.init_bias == "standard": utils.bias_init(model, strategy="constant", value=0.01) # - use pre-trained weights (either for full model or just in conv-layers)? if utils.checkattr(args, "pre_convE") and hasattr( model, 'depth') and model.depth > 0: load_name = model.convE.name if ( not hasattr(args, 'convE_ltag') or args.convE_ltag == "none") else "{}-{}".format( model.convE.name, args.convE_ltag) utils.load_checkpoint(model.convE, model_dir=args.m_dir, name=load_name) if utils.checkattr(args, "pre_convD") and hasattr( model, 'convD') and model.depth > 0: utils.load_checkpoint(model.convD, model_dir=args.m_dir) return model ##-------------------------------------------------------------------------------------------------------------------##
def weights_init(self, init_list, vae_list, flow_list=None, pretrained=None, filters_list=None, logvar=-10.): self.apply( utils.weight_init(module=nn.Conv2d, initf=nn.init.xavier_normal_)) self.apply( utils.weight_init(module=nn.Linear, initf=nn.init.xavier_normal_)) self.apply( utils.weight_init(module=bayes.LogScaleConv2d, initf=utils.const_init(logvar))) self.apply( utils.weight_init(module=bayes.LogScaleLinear, initf=utils.const_init(logvar))) if len(init_list) > 0 and init_list[0] == 'pretrained': assert len(init_list) == 1 w_pretrained = torch.load(pretrained) for k, v in w_pretrained.items(): if k in self.state_dict(): self.state_dict()[k].data.copy_(v) else: tokens = k.split('.') self.state_dict()['.'.join(tokens[:2] + ['mean'] + tokens[-1:])].data.copy_(v) return convs = [self.features.conv1, self.features.conv2] for i, m in enumerate(convs): init = init_list[i] if i < len(init_list) else 'xavier' w = m.mean.weight if isinstance(m, bayes._Bayes) else m.weight if init == 'vae': vae_path = vae_list[i] vae = utils.load_vae(vae_path, device=self.device) z = torch.randn( w.size(0) * w.size(1), vae.encoder.z_dim, 1, 1).to(vae.device) x = vae.decode(z)[0] w.data = x.reshape(w.shape) elif init == 'flow': flow_path = flow_list[i] flow = utils.load_flow(flow_path, device=self.device) utils.flow_init(flow)(w) elif init == 'xavier': pass elif init == 'filters': filters = np.load(filters_list[i]) N = np.prod(w.shape[:2]) filters = filters[np.random.permutation(len(filters))[:N]] w.data = torch.from_numpy(filters.reshape(*w.shape)).to( self.device) else: raise NotImplementedError
def finetune_from(self, path): weight_init(self) weights = torch.load(path, map_location='cpu') load_state = weights.state_dict() own_state = self.state_dict() for name, param in load_state.items(): if name not in own_state: continue if 'head' in name: continue own_state[name].copy_(param)
def __init__(self, args): super(FiLM, self).__init__() # CNN self.cnn = CNN(args) # FiLM Generator self.film_generator = FiLM_Generator(args) # FiLM-ed Network self.filmed_network = FiLMed_Network(args) # weight initialization weight_init(self.modules()) # model device cfg self.to(args.device)
def __init__( self, hidden_dims=[30, 30], input_dim=4, output_dim=1, grad_clip=5, reg=0.01, dropout=0.7, beta=0.05, nu=0.05, ): """ Create a simple multi-layer perceptron network with a number of hidden layers. Input: hidden_dims : A tuple or list of hidden dimensions input_dim : Dimensions of the input (x) output_dim : Dimensions of the output/prediction (y_hat) grad_clip : Clips the gradient for stability in gradient descent Either False (disable gradient clipping) or a number maximum abs value of the gradient reg : L2 regularization scale (==0.0 for no regularization) dropout : The probability of keeping each neuron in dropout beta : (β) Adaption gain (or learning rate) nu : (ν) E-mod gain """ self.n_hidden = len(hidden_dims) self.M = output_dim self.D = input_dim self.grad_clip = grad_clip self.params = {} self.reg = reg # Strength of L2 regulariation self.dropout = dropout # The `keep` probability for dropout self.beta = beta self.nu = nu # For each layer get the input and output dimensions input_dims = [input_dim] + hidden_dims # By default: [4, 10, 10] output_dims = hidden_dims + [output_dim] # By default: [10, 10, 1] # Create all of the initial weights and biases, save to dictionary (aka hashtable) self.params = {} for l in range(self.n_hidden): i, o = input_dims[l], output_dims[l] self.params["w" + str(l)] = weight_init(i, o) self.params["b" + str(l)] = bias_init(o) self.params["w_out"] = weight_init(input_dims[-1], output_dim) self.params["b_out"] = bias_init(output_dim) # This part allows for data whitening at the output layer self.whiten_mu = np.zeros((output_dim)) self.whiten_sigma = np.ones((output_dim))
def __init__(self, num_classes: int): super().__init__() self.num_classes = num_classes self.base_net = MobileNetV1(pretrained=True).model self.source_layer_indexes = [12, 14] self.extras = ModuleList([ Sequential( Conv2d(in_channels=1024, out_channels=256, kernel_size=1), ReLU(), Conv2d(in_channels=256, out_channels=512, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=512, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()), Sequential( Conv2d(in_channels=256, out_channels=128, kernel_size=1), ReLU(), Conv2d(in_channels=128, out_channels=256, kernel_size=3, stride=2, padding=1), ReLU()) ]) weight_init(self.extras) out_channels = 256 self.fpn = FPN([512, 1024, 512, 256, 256, 256], out_channels) out_channels_list = [out_channels] * 6 self.head = SSDHead( num_classes, out_channels_list, self.sizes, AnchorCellCreator(self.aspect_ratios, smin=0.2, smax=0.95))
def __init__(self, args): super(RN, self).__init__() self.args = args self.cnn = CNN(args) # (N,C,H,W) self.pos = self.get_positional_encoding(args) # (1,2,H,W) self.rn_g = RN_G(args) self.rn_f = RN_F(args) cls_ch = args.rn_f_chs.split(",")[-1] cls_ch = int(cls_ch[:-1]) if cls_ch[-1].lower() == "d" else int(cls_ch) self.classifier = nn.Linear(cls_ch, args.num_cat) if args.rn_extension: self.ref_finder = RefFinder(args) weight_init(self.modules()) self.to(args.device)
def finetune_from(self, path): weight_init(self) weights = torch.load(path, map_location='cpu') load_state = weights.state_dict() own_state = self.state_dict() for name, param in load_state.items(): if name not in own_state: continue if 'head' in name: continue own_state[name].copy_(param) # model = Resnet50_FPN_SSD(2) # x = torch.zeros((1, 3, 300, 300)) # # o = model(x)
def init_params(model, args): # - reinitialize all parameters according to default initialization model.apply(utils.weight_reset) # - initialize parameters according to chosen custom initialization (if requested) if hasattr(args, 'init_weight') and not args.init_weight == "standard": utils.weight_init(model, strategy="xavier_normal") if hasattr(args, 'init_bias') and not args.init_bias == "standard": utils.bias_init(model, strategy="constant", value=0.01) # - use pre-trained weights in conv-layers load_name = "{}-e100".format(model.convE.name) utils.load_checkpoint(model.convE, model_dir='./conv_layers', name=load_name) # - freeze weights of conv-layers? if utils.checkattr(args, "bir"): for param in model.convE.parameters(): param.requires_grad = False return model
def __init__(self, img_channel, base_channel, wn, bn, conv_dcp, ca, upscale_factor, num_blocks, wa_rate=1, residual_rescale=1): """ init the model described in my paper. :param img_channel: input image channels, RGB, YCbCr: 3, grey: 1. :param base_channel: base feature channels in the network, int. if wa_rate=1, every feature map in residual blocks should have 'base_channel' channels. :param wn: if use weight normalization, bool. :param bn: if use batch normalization, bool. :param conv_dcp: if use asymmetric decomposed convolution layer in residual blocks, bool. :param ca: if use channel attention mechanism in residual blocks, bool. :param upscale_factor: int, can be any positive integer number. :param num_blocks: how many residual blocks in the network. :param wa_rate: widen activation layer rate, float, wa_rate>=1. if wa_rate=1, this mechanism is disabled. :param residual_rescale: rescale the activation value of residual blocks. Disabled by default. """ nn.Module.__init__(self) channel_lo = round(base_channel / wa_rate) channel_hi = round(base_channel * wa_rate) if channel_lo * 1.2 < img_channel * upscale_factor * upscale_factor: raise Warning( 'wide activation rate not fit upscale factor or too less base channels.' ) self.base_feat_extract = conv_base(img_channel, channel_lo, wn, 3, 1, 1) block_list = [ res_block(channel_lo, channel_hi, wn, bn, conv_dcp, ca, residual_rescale) for _ in range(num_blocks) ] self.res_blocks = nn.Sequential(*block_list) self.upsampler = upsampler(channel_lo, img_channel, upscale_factor, wn) weight_init(self)
def __init__(self, opt): super(MUNIT, self).__init__() # generators and discriminators self.gen_a = Generator(opt.ngf, opt.style_dim, opt.mlp_dim) self.gen_b = Generator(opt.ngf, opt.style_dim, opt.mlp_dim) self.dis_a = Discriminator(opt.ndf) self.dis_b = Discriminator(opt.ndf) #random style code self.s_a = torch.randn(opt.display_size, opt.style_dim, 1, 1, requires_grad=True).cuda() self.s_b = torch.randn(opt.display_size, opt.style_dim, 1, 1, requires_grad=True).cuda() #optimizers dis_params = list(self.dis_a.parameters()) + list( self.dis_b.parameters()) gen_params = list(self.gen_a.parameters()) + list( self.gen_b.parameters()) self.dis_opt = torch.optim.Adam(dis_params, lr=opt.lr, beta=opt.beta1, weight_delay=opt.weight_delay) self.gen_opt = torch.optim.Adam(gen_params, lr=opt.lr, beta=opt.beta1, weight_delay=opt.weight_delay) # nerwork weight initialization self.apply(weight_init('kaiming')) self.dis_a.apply(weight_init('gaussian')) self.dis_b.apply(weight_init('gaussian'))
dis = models.ResNet32Discriminator(N_CHANNEL, 1, N_FILTERS_D, BATCH_NORM_D) elif MODEL == "dcgan": gen = models.DCGAN32Generator(N_LATENT, N_CHANNEL, N_FILTERS_G, batchnorm=BATCH_NORM_G) dis = models.DCGAN32Discriminator(N_CHANNEL, 1, N_FILTERS_D, batchnorm=BATCH_NORM_D) if CUDA: gen = gen.cuda(0) dis = dis.cuda(0) gen.apply(lambda x: utils.weight_init(x, mode='normal')) dis.apply(lambda x: utils.weight_init(x, mode='normal')) if ALGORITHM == 'Adam': import torch.optim as optim dis_optimizer = optim.Adam(dis.parameters(), lr=LEARNING_RATE_D, betas=(BETA_1, BETA_2)) gen_optimizer = optim.Adam(gen.parameters(), lr=LEARNING_RATE_G, betas=(BETA_1, BETA_2)) elif ALGORITHM == 'ExtraAdam': import optim dis_optimizer = optim.ExtraAdam(dis.parameters(), lr=LEARNING_RATE_D, betas=(BETA_1, BETA_2))
test_iter = data.Iterator(dataset=test_data, batch_size=BATCH_SIZE, sort=False) # build model from text_classify.model import RNN, WordAVGModel, TextCNN from text_classify.transformer import Transformer embedding_size = TEXT.vocab.vectors.shape[ 1] if USE_PRE_TRAIN_MODEL else EMBEDDING_SIZE # model = RNN(input_size=len(TEXT.vocab), embedding_size=embedding_size, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS, output_size=len(LABEL.vocab)) # model = TextCNN(input_size=len(TEXT.vocab), embedding_size=embedding_size, output_size=len(LABEL.vocab), pooling_method='avg') model = WordAVGModel(vocab_size=len(TEXT.vocab), embedding_dim=embedding_size, output_dim=len(LABEL.vocab)) # model = Transformer(input_size=len(TEXT.vocab), d_model=embedding_size, num_head=4, d_ff=HIDDEN_SIZE, output_size=len(LABEL.vocab), pad=TEXT.vocab.stoi['<pad>'], use_mask=True) utils.weight_init(model) if USE_PRE_TRAIN_MODEL: model.embedding.weight.data.copy_(TEXT.vocab.vectors) model.to(device) loss_function = nn.CrossEntropyLoss() optimizer = optim.Adam(params=model.parameters(), lr=LEARNING_RATE) scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.98) if TRAIN: for epoch in range(1, 1 + EPOCH_SIZE): torch.cuda.empty_cache() train_loss = [] valid_loss = [] valid_acc = 0 model.train() for batch in tqdm(train_iter): model.zero_grad()
qvalue_node = nengo.Node(size_in=4) # define neurons to encode state representations state = nengo.Ensemble(n_neurons=n_neurons, dimensions=25, intercepts=nengo.dists.Choice([0.15]), radius=2) # define neurons that compute the learning signal learn_signal = nengo.Ensemble(n_neurons=1000, dimensions=4) # connect the sensor to state ensemble nengo.Connection(sensor_node, state, synapse=None) reward_probe = nengo.Probe(reward_node, synapse=fast_tau) # connect state representation to environment interface q_conn = nengo.Connection(state.neurons, update_node, transform=weight_init(shape=(n_actions, n_neurons)), learning_rule_type=nengo.PES(1e-3, pre_tau=slow_tau), synapse=fast_tau) # connect update node to error signal ensemble w/ fast, slow conns to compute prediction error nengo.Connection(update_node[0:n_actions], learn_signal, transform=-1, synapse=slow_tau) nengo.Connection(update_node[n_actions:2 * n_actions], learn_signal, transform=1, synapse=fast_tau) # connect the learning signal to the learning rule nengo.Connection(learn_signal, q_conn.learning_rule, transform=-1, synapse=fast_tau) # for plotting and visualization purposes nengo.Connection(update_node[2 * n_actions:], qvalue_node, synapse=fast_tau) with nengo.Simulator(model) as sim: sim.run(10)