def __init__(self, layer_sizes, activation, weights): self.layer_sizes = layer_sizes self.activation = activations.get_activation(activation) self.weights = weights weights = np.array(list(map(float, self.weights))) l_bound = 0 r_bound = 0 self.matrices = [] for i in range(len(self.layer_sizes) - 1): m = self.layer_sizes[i] + 1 n = self.layer_sizes[i + 1] r_bound += m * n self.matrices.append(weights[l_bound:r_bound:1].reshape(m, n)) l_bound = r_bound
def __init__(self, dim_input=3 * 32 * 32, dim_output=10, weight_scale=1e-4, activation='identity'): """ Keyword Arguments: dim_input {int} -- dimension du input de la couche. (default: {3*32*32}) dim_output {int} -- nombre de neurones de notre couche (default: {10}) weight_scale {float} -- écart type de la distribution normale utilisée pour l'initialisation des poids. Si None, initialisation Xavier ou He. (default: {1e-4}) activation {str} -- identifiant de la fonction d'activation de la couche (default: {'identity'}) """ self.dim_input = dim_input self.dim_output = dim_output self.weight_scale = weight_scale self.activation_id = activation if weight_scale is not None: # Initialisation avec une distribution normale avec écart type = weight_scale self.W = np.random.normal(loc=0.0, scale=weight_scale, size=(dim_input, dim_output)) elif activation == 'relu': # Initialisation 'He' avec une distribution normale self.W = np.random.normal(loc=0.0, scale=math.sqrt(2.0 / dim_input), size=(dim_input, dim_output)) else: # Initialisation 'Xavier' avec une distribution normale self.W = np.random.normal(loc=0.0, scale=math.sqrt( 2.0 / (dim_input + dim_output)), size=(dim_input, dim_output)) self.b = np.zeros(dim_output) self.dW = 0 self.db = 0 self.reg = 0.0 self.cache = None self.activation = get_activation(activation)
def __init__(self, num_class, eps=1e-5, momentum=0.9, weight_scale=None, activation='identity'): """ Keyword Arguments: num_class {int} -- nombre de classes pour chaque données, soit le D dans (N, D). eps {float} -- hyperparamètre dans le calcul de normalisation (default: {1e-5}) momentum {float} -- hyperparamètre qui détermine l'accumulation de moyenne et de variance lors de l'entrainement dans le but d'être utilisées lors des tests. (default: {0.9}) weight_scale {float} -- écart type de la distribution normale utilisée pour l'initialisation des gammas. Si None, initialisation avec des 1. (default: {None}) activation {str} -- identifiant de la fonction d'activation de la couche (default: {'identite'}) """ self.num_class = num_class self.eps = eps self.momentum = momentum self.weight_scale = weight_scale self.activation_id = activation if weight_scale is not None: # Initialisation avec une distribution normale avec écart type = weight_scale self.gamma = np.random.normal(loc=0.0, scale=weight_scale, size=num_class) else: self.gamma = np.ones(num_class) self.beta = np.zeros(num_class) self.test_mean = np.zeros(num_class) self.test_var = np.zeros(num_class) self.dgamma = 0 self.dbeta = 0 self.reg = 0.0 self.cache = None self.activation = get_activation(activation)
def __init__(self, num_topics, vocab_size, t_hidden_size, rho_size, theta_act, train_embeddings=True, enc_drop=0.5): super(ETM, self).__init__() ## define hyperparameters self.num_topics = num_topics self.vocab_size = vocab_size self.t_hidden_size = t_hidden_size self.rho_size = rho_size self.enc_drop = enc_drop self.theta_act_name = theta_act self.train_emb = train_embeddings self.t_drop = nn.Dropout(enc_drop) self.theta_act = get_activation(theta_act) self.rho = nn.Linear(rho_size, vocab_size, bias=False) ## define the matrix containing the topic embeddings self.alphas = nn.Linear( rho_size, num_topics, bias=False) # nn.Parameter(torch.randn(rho_size, num_topics)) ## define variational distribution for \theta_{1:D} via amortizartion self.q_theta = nn.Sequential( nn.Linear(vocab_size, t_hidden_size), self.theta_act, nn.Linear(t_hidden_size, t_hidden_size), self.theta_act, ) self.mu_q_theta = nn.Linear(t_hidden_size, num_topics, bias=True) self.logsigma_q_theta = nn.Linear(t_hidden_size, num_topics, bias=True) self.norm_bow = True self.timer = Timer() self.save_hyperparameters()
def get_model(config, tag='G'): """ 1) define arch. 2) append final act. 3) a) load checkpoint b) init params. 4) .to(dev.) :param config: :param tag: title of model in CONFIG :return: """ models_module = importlib.import_module('protocols.{}.models'.format( config.protocol)) model_config = getattr(config.model, tag) model = getattr(models_module, model_config.name)(**model_config.kwargs) model = nn.Sequential(model, get_activation(entry=model_config)) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") if model_config.load_state == 0: def weights_init(m): func = get_init_func(model_config) if isinstance(m, nn.Conv2d) or isinstance(m, torch.nn.Linear): func(m.weight.data) if m.bias is not None: torch.nn.init.zeros_(m.bias) print('Using initialization function') model.apply(weights_init) elif model_config.load_state == -1 or model_config.load_state == "latest": path_to_weights = os.path.join(config.system.checkpoints_root, config.name, '{}_latest.pth'.format(tag)) state_dict = torch.load(path_to_weights) if "module" in list(state_dict.keys())[0]: new_state_dict = {} for k, v in state_dict.items(): name = k[7:] # remove `module.` new_state_dict[name] = v state_dict = new_state_dict model.load_state_dict(state_dict) print('Restore the model from: {}'.format(path_to_weights)) else: try: path_to_weights = model_config.load_state state_dict = torch.load(path_to_weights) if "module" in list(state_dict.keys())[0]: new_state_dict = {} for k, v in state_dict.items(): name = k[7:] # remove `module.` new_state_dict[name] = v state_dict = new_state_dict model.load_state_dict(state_dict) print('Restore the model from: {}'.format(path_to_weights)) except FileNotFoundError: print( 'Unable to load model weights. Please check "config.model.load_state" field. It must be either ' 'checkpoint number or path to existing weights dump') return model.to(device)
def __init__(self, num_filters, filter_size=3, channels=1, stride=1, padding=0, weight_scale=1e-3, activation='identity'): """ Keyword Arguments: num_filters {int} -- nombre de cartes d'activation. filter_size {int, tuple} -- taille des filtres. (default: {3}) channels {int} -- nombre de canaux. Doit être égal au nombre de canaux des données en entrée. (default: {1}) stride {int, tuple} -- taille de la translation des filtres. (default: {1}) padding {int, tuple} -- nombre de zéros à rajouter avant et après les données. La valeur représente seulement les zéros d'un côté. (default: {0}) weight_scale {float} -- écart type de la distribution normale utilisée pour l'initialisation des weights. (default: {1e-4}) activation {str} -- identifiant de la fonction d'activation de la couche (default: {'identite'}) """ self.num_filters = num_filters self.filter_size = filter_size self.channels = channels self.weight_scale = weight_scale self.activation_id = activation if isinstance(stride, tuple): self.stride = stride elif isinstance(stride, int): self.stride = (stride, stride) else: raise Exception("Invalid stride format, must be tuple or integer") if isinstance(padding, tuple): self.pad = padding elif isinstance(padding, int): self.pad = (padding, padding) else: raise Exception("Invalid padding format, must be tuple or integer") if not isinstance(channels, int): raise Exception("Invalid channels format, must be integer") if isinstance(filter_size, tuple): self.W = np.random.normal(loc=0.0, scale=weight_scale, size=(num_filters, channels, filter_size[0], filter_size[1])) elif isinstance(filter_size, int): self.W = np.random.normal(loc=0.0, scale=weight_scale, size=(num_filters, channels, filter_size, filter_size)) else: raise Exception("Invalid filter format, must be tuple or integer") self.b = np.zeros(num_filters) self.dW = np.zeros(self.W.shape) self.db = np.zeros(self.b.shape) self.reg = 0.0 self.cache = None self.activation = get_activation(activation)