def __init__(self, args): super(Controller, self).__init__() self.args = args if self.args.network_type == 'rnn': self.num_tokens = [len(args.shared_rnn_activations)] for idx in range(self.args.num_blocks): self.num_tokens += [idx + 1, len(args.shared_rnn_activations)] self.func_names = args.shared_rnn_activations elif self.args.network_type == 'cnn': self.num_tokens = [len(args.shared_cnn_types), self.args.num_blocks] self.func_names = args.shared_cnn_types num_total_tokens = sum(self.num_tokens) self.encoder = nn.Embedding(num_total_tokens, args.controller_hid) self.lstm = nn.LSTMCell( args.controller_hid, args.controller_hid) pivot = 0 self.decoders = [] for idx, size in enumerate(self.num_tokens): decoder = nn.Linear(args.controller_hid, size) self.decoders.append(decoder) self._decoders = nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = keydefaultdict(self.init_hidden) fn = lambda key: get_variable( t.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False) self.static_inputs = keydefaultdict(fn)
def __init__(self, args): torch.nn.Module.__init__(self) self.args = args self.num_tokens = [ len(args.augment_types), # 所用增强方式的个数 len(args.magnitude_types), # 划分取值的个数 len(args.prob_types) # 划分概率的个数 ] * self.args.op_num_pre_subpolicy * self.args.subpolicy_num num_total_tokens = sum(self.num_tokens) # 30个 self.encoder = torch.nn.Embedding(num_total_tokens, args.controller_hid_size) self.lstm = torch.nn.LSTMCell(args.controller_hid_size, args.controller_hid_size) # LSTM一次决定用哪个增强方式、增强方式的幅度、增强方式的概率 self.decoders = [] for idx, size in enumerate(self.num_tokens): decoder = torch.nn.Linear(args.controller_hid_size, size) self.decoders.append(decoder) self._decoders = torch.nn.ModuleList(self.decoders) self._init_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) def _get_default_hidden(key): return utils.get_variable(torch.zeros( key, self.args.controller_hid_size), self.args.cuda, requires_grad=False) self.static_inputs = utils.keydefaultdict(_get_default_hidden)
def __init__(self, args): torch.nn.Module.__init__(self) # args.cuda = True self.args = args self.controller_hid = 100 self.num_tokens = [9, 24, 9] ### num_total_tokens = self.num_tokens[1] self.encoder = torch.nn.Embedding(num_total_tokens, self.controller_hid) self.lstm = torch.nn.LSTMCell(self.controller_hid, self.controller_hid) self.decoders = [] for i in range(self.num_tokens[0]): decoder = torch.nn.Linear(self.controller_hid, self.num_tokens[1]) self.decoders.append(decoder) decoder = torch.nn.Linear(self.controller_hid, self.num_tokens[2]) self.decoders.append(decoder) self._decoders = torch.nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) def _get_default_hidden(key): return utils.get_variable(torch.zeros(key, self.controller_hid), self.args.cuda, requires_grad=False) self.static_inputs = utils.keydefaultdict(_get_default_hidden)
def __init__(self, args): torch.nn.Module.__init__(self) self.args = args self.num_tokens = [len(OPS), 5] # ops 수 , block 수 self.func_names = PRIMITIVES num_total_tokens = sum(self.num_tokens) self.encoder = torch.nn.Embedding(num_total_tokens, args.controller_hid) self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid) # 100, 100 # TODO(brendan): Perhaps these weights in the decoder should be # shared? At least for the activation functions, which all have the # same size. self.decoders = [] for idx, size in enumerate(self.num_tokens): decoder = torch.nn.Linear(args.controller_hid, size) self.decoders.append(decoder) self._decoders = torch.nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) def _get_default_hidden(key): return utils.get_variable(torch.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False) self.static_inputs = utils.keydefaultdict(_get_default_hidden)
def __init__(self, n_ops, n_nodes, device, controller_hid=None, lstm_num_layers=2): super(ArchMaster, self).__init__() self.K = sum([x + 2 for x in range(n_nodes)]) self.n_ops = n_ops self.n_nodes = n_nodes self.device = device self.controller_hid = controller_hid self.attention_hid = self.controller_hid self.lstm_num_layers = lstm_num_layers # Embedding of (n_nodes+1) nodes # Note that the (n_nodes+2)-th node will not be used self.node_op_hidden = nn.Embedding(n_nodes + 1 + n_ops, self.controller_hid) self.emb_attn = nn.Linear(self.controller_hid, self.attention_hid, bias=False) self.hid_attn = nn.Linear(self.controller_hid, self.attention_hid, bias=False) self.v_attn = nn.Linear(self.controller_hid, 1, bias=False) self.w_soft = nn.Linear(self.controller_hid, self.n_ops) self.lstm = nn.LSTMCell(self.controller_hid, self.controller_hid) self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) self.static_inputs = utils.keydefaultdict(self._get_default_hidden) self.tanh = nn.Tanh() self.prev_nodes, self.prev_ops = [], [] self.query_index = torch.LongTensor(range(0, n_nodes+1)).to(device)
def __init__(self, args): torch.nn.Module.__init__(self) self.args = args self.num_tokens = [] for idx in range(self.args._max_depth): self.num_tokens += [len(args.operations)] self.op_names = args.operations num_total_tokens = sum(self.num_tokens) self.encoder = torch.nn.Embedding(num_total_tokens, args.controller_hid) self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid) # TODO(brendan): Perhaps these weights in the decoder should be # shared? At least for the activation functions, which all have the # same size. self.decoders = [] for idx, size in enumerate(self.num_tokens): decoder = torch.nn.Linear(args.controller_hid, size) self.decoders.append(decoder) self._decoders = torch.nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) def _get_default_hidden(key): return torch.zeros(key, self.args.controller_hid) self.static_inputs = utils.keydefaultdict(_get_default_hidden)
def __init__(self, args): super(CNNMicroController, self).__init__(args) self.args = args if self.args.network_type == 'micro_cnn': # First node always take the input to the Cell. # For normal cell # self.num_normal_tokens = [len(args.shared_cnn_normal_types)] self.num_normal_tokens = [] for idx in range(self.args.num_blocks): # NOTE for CNN, the node have two input and op rather than 1. self.num_normal_tokens += [ idx + 2, len(args.shared_cnn_normal_types) ] * 2 self.normal_func_names = args.shared_cnn_normal_types # reduce_tokens # self.num_reduce_tokens = [len(args.shared_cnn_reduce_types)] self.num_reduce_tokens = [] for idx in range(self.args.num_blocks): self.num_reduce_tokens += [ idx + 2, len(args.shared_cnn_reduce_types) ] * 2 self.reduce_func_names = args.shared_cnn_reduce_types # Combine the num tokens as a full list. self.num_tokens = self.num_normal_tokens + self.num_reduce_tokens self.func_names = [self.normal_func_names, self.reduce_func_names] else: raise NotImplementedError( f'{self.args.network_type} is not supported yet') num_total_tokens = sum(self.num_tokens) self.encoder = torch.nn.Embedding(num_total_tokens, args.controller_hid) self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid) self.decoders = [] for idx, size in enumerate(self.num_tokens): decoder = torch.nn.Linear(args.controller_hid, size) self.decoders.append(decoder) self._decoders = torch.nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) def _get_default_hidden(key): return utils.get_variable(torch.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False) self.static_inputs = utils.keydefaultdict(_get_default_hidden)
def __init__(self, args): torch.nn.Module.__init__(self) self.args = args if self.args.network_type == 'rnn': # NOTE(brendan): `num_tokens` here is just the activation function # for every even step, self.num_tokens = [len(args.shared_rnn_activations) ] #"['tanh', 'ReLU', 'identity', 'sigmoid']" for idx in range(self.args.num_blocks): #num_blocks:default=12 self.num_tokens += [idx + 1, len(args.shared_rnn_activations)] #经过上面那个for循环,num_tokens变为[4, 1, 4, 2, 4, 3, 4, 4, 4, 5, 4, 6, 4, 7, 4, 8, 4, 9, 4, 10, 4, 11, 4, 12, 4] #4代表激活函数使用哪个,1-12代表节点编号 self.func_names = args.shared_rnn_activations #func_name= "['tanh', 'ReLU', 'identity', 'sigmoid']" elif self.args.network_type == 'cnn': self.num_tokens = [ len(args.shared_cnn_types), self.args.num_blocks ] self.func_names = args.shared_cnn_types num_total_tokens = sum(self.num_tokens) #num_total_tokens=130 self.encoder = torch.nn.Embedding(num_total_tokens, args.controller_hid) #(130,100) self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid) #(100,100) # TODO(brendan): Perhaps these weights in the decoder should be # shared? At least for the activation functions, which all have the # same size. self.decoders = [] #len(self.decoders)=25=len(self.num_tokens) for idx, size in enumerate(self.num_tokens): decoder = torch.nn.Linear(args.controller_hid, size) self.decoders.append(decoder) #ModuleList can be indexed like a regular Python list, #but modules it contains are properly registered, and will be visible by all Module methods. #使用ModuleList可以管理多个Mudule而不用每个Module都起个名字,实际上就是一个Module数组 self._decoders = torch.nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) #产生一个Variable(Tensor([key,100]) def _get_default_hidden(key): return utils.get_variable(torch.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False) #定义了一个defaultdict,当不存在默认值的时候回调用defaultdict.default_factory()方法指定一个生成默认值的方法, #因此这里的默认值就是由_get_default_hidden生成的 #因此,static_inputs就是一个defaultdict #用于产生一系列tensor self.static_inputs = utils.keydefaultdict(_get_default_hidden)
def __init__(self, args): torch.nn.Module.__init__(self) self.args = args if self.args.network_type == 'rnn': # NOTE(brendan): `num_tokens` here is just the activation function # for every even step, self.num_tokens = [len(args.shared_rnn_activations)] #[] for idx in range(self.args.num_blocks): self.num_tokens += [idx + 1, len(args.shared_rnn_activations)] self.func_names = args.shared_rnn_activations elif self.args.network_type == 'cnn': self.num_tokens = [len(args.shared_cnn_types)] #[4] # print("----cnn_num_blocks", args.cnn_num_blocks) for idx in range( sum(args.cnn_num_blocks) - 1): # [4 2 4 3 4 4 4 5 4 6 4 7 4 8 4 9 4 10 4 11 4 12 4 ] self.num_tokens += [idx + 2, len(args.shared_cnn_types)] # larger 1 because can add use original image for skip connection self.func_names = args.shared_cnn_types num_total_tokens = sum(self.num_tokens) # if self.args.network_type == 'rnn': self.encoder = torch.nn.Embedding(num_total_tokens, args.controller_hid) self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid) # TODO(brendan): Perhaps these weights in the decoder should be # shared? At least for the activation functions, which all have the # same size. self.decoders = [] # print("4444444 ") for idx, size in enumerate(self.num_tokens): # 4 2 4 3 4 4 4 5 4 6 4 7 4 8 4 9 4 10 4 11 4 12 4 decoder = torch.nn.Linear(args.controller_hid, size) self.decoders.append(decoder) self._decoders = torch.nn.ModuleList(self.decoders) # print("222222") self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) def _get_default_hidden(key): return utils.get_variable( torch.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False) self.static_inputs = utils.keydefaultdict(_get_default_hidden)
def filter_deleted_structure(self, responses): deleted_things = keydefaultdict( lambda t: t.closest_deleted_ancestor() is not None) filtered_responses = [ r for r in responses if not deleted_things[r.qnode_measure] ] return filtered_responses, []
def __init__(self, args, corpus): models.shared_base.SharedModel.__init__(self) self.args = args self.corpus = corpus self.forward_eval = 0 self.decoder = nn.Linear(args.shared_hid, corpus.num_tokens) self.encoder = EmbeddingDropout(corpus.num_tokens, args.shared_embed, dropout=args.shared_dropoute) self.lockdrop = LockedDropout() if self.args.tie_weights: self.decoder.weight = self.encoder.weight # NOTE(brendan): Since W^{x, c} and W^{h, c} are always summed, there # is no point duplicating their bias offset parameter. Likewise for # W^{x, h} and W^{h, h}. self.w_xc = nn.Linear(args.shared_embed, args.shared_hid) self.w_xh = nn.Linear(args.shared_embed, args.shared_hid) # The raw weights are stored here because the hidden-to-hidden weights # are weight dropped on the forward pass. self.w_hc_raw = torch.nn.Parameter( torch.Tensor(args.shared_hid, args.shared_hid)) self.w_hh_raw = torch.nn.Parameter( torch.Tensor(args.shared_hid, args.shared_hid)) self.w_hc = None self.w_hh = None self.w_h = collections.defaultdict(dict) self.w_c = collections.defaultdict(dict) for idx in range(args.num_blocks): for jdx in range(idx + 1, args.num_blocks): self.w_h[idx][jdx] = nn.Linear(args.shared_hid, args.shared_hid, bias=False) self.w_c[idx][jdx] = nn.Linear(args.shared_hid, args.shared_hid, bias=False) self._w_h = nn.ModuleList( [self.w_h[idx][jdx] for idx in self.w_h for jdx in self.w_h[idx]]) self._w_c = nn.ModuleList( [self.w_c[idx][jdx] for idx in self.w_c for jdx in self.w_c[idx]]) if args.mode == 'train': self.batch_norm = nn.BatchNorm1d(args.shared_hid) else: self.batch_norm = None self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) logger.info(f'# of parameters: {format(self.num_parameters, ",d")}')
def __init__(self, args): torch.nn.Module.__init__(self) self.args = args self.baseline = None if self.args.network_type == 'rnn': # NOTE(brendan): `num_tokens` here is just the activation function # for every even step, self.num_tokens = [len(args.shared_rnn_activations)] for idx in range(self.args.num_blocks): self.num_tokens += [idx + 1, len(args.shared_rnn_activations)] self.func_names = args.shared_rnn_activations elif self.args.network_type == 'cnn': self.num_tokens = [ len(args.shared_cnn_types), self.args.num_blocks ] self.func_names = args.shared_cnn_types num_total_tokens = sum(self.num_tokens) self.encoder = torch.nn.Embedding(num_total_tokens, args.controller_hid) self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid) # TODO(brendan): Perhaps these weights in the decoder should be # shared? At least for the activation functions, which all have the # same size. self.decoders = [] for idx, size in enumerate(self.num_tokens): decoder = torch.nn.Linear(args.controller_hid, size) self.decoders.append(decoder) self._decoders = torch.nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) def _get_default_hidden(key): return utils.get_variable(torch.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False) self.static_inputs = utils.keydefaultdict(_get_default_hidden)
def __init__(self, args): super(Controller, self).__init__() self.args = args self.num_dags = 1 if self.args.use_single_controller and self.args.use_dual_controller: self.num_dags += 1 if self.args.use_single_controller and self.args.use_atten_controller: self.num_dags += 1 if self.args.network_type in ['rnn','seq2seq','classification']: self.num_tokens = [len(args.rnn_activations)] for idx in range(self.args.num_blocks): self.num_tokens += [idx + 1, len(args.rnn_activations)] self.func_names = args.rnn_activations elif self.args.network_type == 'cnn': self.num_tokens = [len(args.cnn_types), self.args.num_blocks] self.func_names = args.cnn_types else: raise Exception('Unknown network type: {self.args.network_type}') num_total_tokens = sum(self.num_tokens) self.encoder = nn.Embedding(num_total_tokens, args.controller_hid) self.lstm = nn.LSTMCell( args.controller_hid, args.controller_hid) pivot = 0 self.decoders = [] for idx, size in enumerate(self.num_tokens): decoder = nn.Linear(args.controller_hid, size) self.decoders.append(decoder) self._decoders = nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = keydefaultdict(self.init_hidden) fn = lambda key: get_variable( t.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False) self.static_inputs = keydefaultdict(fn)
def __init__(self, args, corpus): shared_base.SharedModel.__init__(self) self.args = args self.corpus = corpus self.decoder = torch.nn.Linear(in_features=args.shared_hid, out_features=corpus.num_tokens) self.encoder = EmbeddingDropout(corpus.num_tokens, args.shared_embed, dropout=args.shared_dropoute) self.lockdrop = LockedDropout() ## ??? if self.args.tie_weights: self.decoder.weight = self.encoder.weight self.w_xc = torch.nn.Linear(args.shared_embed, args.shared_hid) self.w_xh = torch.nn.Linear(args.shared_embed, args.shared_hid) # The raw weights are stored here because the hidden-to-hidden weights # are weight dropped on the forward pass. self.w_hc_raw = torch.nn.Parameter( torch.Tensor(args.shared_hid, args.shared_hid)) self.w_hh_raw = torch.nn.Parameter( torch.Tensor(args.shared_hid, args.shared_hid)) self.w_hc = None self.w_hh = None self.w_h = collections.defaultdict(dict) self.w_c = collections.defaultdict(dict) for idx in range(args.num_blocks): for jdx in range(idx + 1, args.num_blocks): self.w_h[idx][jdx] = torch.nn.Linear(args.shared_hid, args.shared_hid, bias=False) self.w_c[idx][jdx] = torch.nn.Linear(args.shared_hid, args.shared_hid, bias=False) self._w_h = torch.nn.ModuleList( [self.w_h[idx][jdx] for idx in self.w_h for jdx in self.w_h[idx]]) self._w_c = torch.nn.ModuleList( [self.w_c[idx][jdx] for idx in self.w_c for jdx in self.w_c[idx]]) if args.mode == 'train': self.batch_norm = torch.nn.BatchNorm1d(args.shared_hid) else: self.batch_norm = None self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden)
def __init__(self, args): torch.nn.Module.__init__(self) self.args = args if self.args.network_type == 'rnn': self.num_tokens = [len(args.shared_rnn_activations)] for idx in range(self.args.num_blocks): self.num_tokens += [idx + 1, len(args.shared_rnn_activations)] self.func_names = args.shared_rnn_activations elif self.args.network_type == 'micro_cnn': # First node always take the input to the Cell. return else: raise NotImplementedError( f'{self.args.network_type} is not supported yet') num_total_tokens = sum(self.num_tokens) self.encoder = torch.nn.Embedding(num_total_tokens, args.controller_hid) self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid) self.decoders = [] for idx, size in enumerate(self.num_tokens): decoder = torch.nn.Linear(args.controller_hid, size) self.decoders.append(decoder) self._decoders = torch.nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) def _get_default_hidden(key): return utils.get_variable(torch.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False) self.static_inputs = utils.keydefaultdict(_get_default_hidden)
def __init__(self, args, corpus): super(RNN, self).__init__() self.args = args self.corpus = corpus self.encoder = nn.Embedding(corpus.num_tokens, args.shared_embed) self.decoder = nn.Linear(args.shared_hid, corpus.num_tokens) self.lockdrop = LockedDropout() if self.args.tie_weights: self.decoder.weight = self.encoder.weight self.w_xh = nn.Linear(args.shared_embed + args.shared_hid, args.shared_hid) self.w_xc = nn.Linear(args.shared_embed + args.shared_hid, args.shared_hid) self.w_h, self.w_c = defaultdict(dict), defaultdict(dict) for idx in range(args.num_blocks): for jdx in range(idx + 1, args.num_blocks): self.w_h[idx][jdx] = nn.Linear(args.shared_hid, args.shared_hid, bias=False) self.w_c[idx][jdx] = nn.Linear(args.shared_hid, args.shared_hid, bias=False) self._w_h = nn.ModuleList( [self.w_h[idx][jdx] for idx in self.w_h for jdx in self.w_h[idx]]) self._w_c = nn.ModuleList( [self.w_c[idx][jdx] for idx in self.w_c for jdx in self.w_c[idx]]) if args.mode == 'train': self.batch_norm = nn.BatchNorm1d(args.shared_hid) else: self.batch_norm = None self.reset_parameters() self.static_init_hidden = keydefaultdict(self.init_hidden) logger.info(f("# of parameters: {format(self.num_parameters, ',d')}"))
def __init__(self, args): torch.nn.Module.__init__(self) self.args = args self.num_tokens = [] self.arch_layer = args.layers + 1 self.multi_layer = args.multi_layer self.every_cell = args.every_cell if self.multi_layer or self.every_cell: layers = self.arch_layer else: layers = 1 for _ in range(layers): for idx in range(self.args.num_blocks): self.num_tokens += [ idx + 1, idx + 1, len(args.shared_cnn_types), len(args.shared_cnn_types) ] if self.every_cell: for _ in range(layers - 1): for idx in range(self.args.num_blocks): self.num_tokens += [ idx + 1, idx + 1, len(args.shared_cnn_types), len(args.shared_cnn_types) ] self.func_names = args.shared_cnn_types num_total_tokens = sum(self.num_tokens) self.encoder = torch.nn.Embedding(num_total_tokens, args.controller_hid) if self.args.rnn_type == 'lstm': self.lstm = [] for _ in range(self.args.rnn_layer): self.lstm.append( torch.nn.LSTMCell(args.controller_hid, args.controller_hid)) self._lstm = torch.nn.ModuleList(self.lstm) if self.args.rnn_type == 'rnn': self.rnn = [] for _ in range(self.args.rnn_layer): self.rnn.append( torch.nn.RNNCell(args.controller_hid, args.controller_hid)) self._rnn = torch.nn.ModuleList(self.rnn) #self.lstm = torch.nn.LSTMCell(args.controller_hid, args.controller_hid) # TODO(brendan): Perhaps these weights in the decoder should be # shared? At least for the activation functions, which all have the # same size. self.decoders = [] for _ in range(layers): for idx, size in enumerate(self.num_tokens): decoder = torch.nn.Linear(args.controller_hid, size) self.decoders.append(decoder) if self.every_cell: for _ in range(layers - 1): for idx, size in enumerate(self.num_tokens): decoder = torch.nn.Linear(args.controller_hid, size) self.decoders.append(decoder) self._decoders = torch.nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) def _get_default_hidden(key): return utils.get_variable(torch.zeros(key, self.args.controller_hid), self.args.cuda, requires_grad=False) self.static_inputs = utils.keydefaultdict(_get_default_hidden)
def __init__(self, args, corpus): """ :param args: 命令行参数 :param corpus: 数据集 :properties decoder,从1000到10000的映射,一个全链接层 encoder,一个自定义的EmbeddingDropout层,从10000,到1000的映射,可以设置dropout lockdrop,一个单独dropout层,作用? args.tie_weights:作用不明,用encode的权重覆盖decoder的权重?这也不是一个网络结构,怎么能覆盖呢? w_xc,w_xh,w_hc,w_hh,w_hc_raw,w_hh_raw,w_h,w_c,RNN的参数矩阵这里执行的就是一些初始化的工作 static_init_hidden,作用,在forward中hidden不存在的时候可以设置一个hidden保证程序执行 """ models.shared_base.SharedModel.__init__(self) #构造父类 self.args = args self.corpus = corpus #linear实现从1000到10000的映射,也就是从隐藏维度映射回词的编号 self.decoder = nn.Linear( args.shared_hid, corpus.num_tokens ) #shared_hid=1000,corpus.num_tokens=10000,在数据集中一共有10000个不同的词、 #encoder实现从10000到1000的映射 self.encoder = EmbeddingDropout( corpus.num_tokens, args.shared_embed, #shared_embed=1000隐藏维度 dropout=args.shared_dropoute) #shared_dropoute=0.1 self.lockdrop = LockedDropout() #一个单独的dropout #??? if self.args.tie_weights: self.decoder.weight = self.encoder.weight # NOTE(brendan): Since W^{x, c} and W^{h, c} are always summed, there # is no point duplicating their bias offset parameter. Likewise for # W^{x, h} and W^{h, h}. self.w_xc = nn.Linear(args.shared_embed, args.shared_hid) #(1000,1000) self.w_xh = nn.Linear(args.shared_embed, args.shared_hid) # The raw weights are stored here because the hidden-to-hidden weights # are weight dropped on the forward pass. self.w_hc_raw = torch.nn.Parameter( torch.Tensor(args.shared_hid, args.shared_hid)) self.w_hh_raw = torch.nn.Parameter( torch.Tensor(args.shared_hid, args.shared_hid)) self.w_hc = None #这两个参数是在forward中由w_hc_raw生成而来(dropout而来) self.w_hh = None self.w_h = collections.defaultdict( dict ) #collections.defaultdict(function_factory)一个函数工厂,里面的每个对象都是一个dict self.w_c = collections.defaultdict(dict) for idx in range(args.num_blocks): for jdx in range(idx + 1, args.num_blocks): #二维字典,形成一个下三角字典矩阵,存储的block的wh和wc self.w_h[idx][jdx] = nn.Linear(args.shared_hid, args.shared_hid, bias=False) self.w_c[idx][jdx] = nn.Linear(args.shared_hid, args.shared_hid, bias=False) #又把上面的字典矩阵转存到_w_h和_w_c中 self._w_h = nn.ModuleList( [self.w_h[idx][jdx] for idx in self.w_h for jdx in self.w_h[idx]]) self._w_c = nn.ModuleList( [self.w_c[idx][jdx] for idx in self.w_c for jdx in self.w_c[idx]]) if args.mode == 'train': self.batch_norm = nn.BatchNorm1d(args.shared_hid) else: self.batch_norm = None #重置参数 self.reset_parameters() #返回一个字典类keydefaultdict继承自defaultdic,自己实现了__missing__方法,当访问的key没有value的时候用init_hidden来初始化一个value, #这个value就是一个全零的Variable self.static_init_hidden = utils.keydefaultdict( self.init_hidden) #init_hidden是一个方法,返回一个全零的Variable logger.info('# of parameters: {0}'.format( format(self.num_parameters, ",d")))
from utils import keydefaultdict input_path = sys.argv[1] input_filename = os.path.splitext(input_path)[0] reader = csv.DictReader(open(input_path)) def make_csv_writer(solution_depth): 'Uses input_filename and reader.fieldnames globals' path = "%s_depth%s.csv" % (input_filename, solution_depth) writer = csv.DictWriter(open(path, 'wb'), reader.fieldnames) writer.writeheader() return writer gridname_to_csv_writer = keydefaultdict(make_csv_writer) solution_depth_columns = [ col_name for col_name in reader.fieldnames if col_name.endswith(" Solution Depth") ] for row in reader: for col_name in solution_depth_columns: depth = int(row[col_name]) if depth != -1: gridname_to_csv_writer[depth].writerow(row) break else: gridname_to_csv_writer[-1].writerow(row)
def __init__(self, args, num_layers=3, skip_conn=False, controller_hid=100, cuda=True, mode="train", softmax_temperature=5.0, tanh_c=2.5): torch.nn.Module.__init__(self) self.mode = mode self.num_layers = num_layers self.skip_conn = skip_conn self.controller_hid = controller_hid self.is_cuda = cuda if args and args.softmax_temperature: self.softmax_temperature = args.softmax_temperature else: self.softmax_temperature = softmax_temperature if args and args.tanh_c: self.tanh_c = args.tanh_c else: self.tanh_c = tanh_c self.num_tokens = [] state_space_length = [] if not skip_conn: keys = state_space.keys() for key in keys: state_space_length.append(len(state_space[key])) for _ in range(self.num_layers): self.num_tokens += state_space_length else: keys = state_space.keys() for idx in range(1, self.num_layers + 1): self.num_tokens += [idx] for key in keys: self.num_tokens += len(state_space[key]) num_total_tokens = sum(self.num_tokens) self.encoder = torch.nn.Embedding(num_total_tokens, controller_hid) self.lstm = torch.nn.LSTMCell(controller_hid, controller_hid) self.decoders = [] if not skip_conn: # share the same decoder for idx, size in enumerate(state_space_length): decoder = torch.nn.Linear(controller_hid, size) self.decoders.append(decoder) else: state_decoder = [] # shared decoder for idx, size in enumerate(state_space_length): decoder = torch.nn.Linear(controller_hid, size) state_decoder.append(decoder) for idx in range(1, self.num_layers + 1): # skip_connection decoder = torch.nn.Linear(controller_hid, idx) self.decoders.append(decoder) # common action for decoder in state_decoder: self.decoders.append(decoder) self._decoders = torch.nn.ModuleList(self.decoders) self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) def _get_default_hidden(key): return utils.get_variable( torch.zeros(key, controller_hid), cuda, requires_grad=False) self.static_inputs = utils.keydefaultdict(_get_default_hidden)
def __init__(self, args, corpus): super(RNN, self).__init__(args) self.args = args self.corpus = corpus self.decoder = nn.Linear(args.shared_hid, corpus.num_tokens) self.encoder = EmbeddingDropout(corpus.num_tokens, args.shared_embed, dropout=args.shared_dropoute) self.lockdrop = LockedDropout() if self.args.tie_weights: self.decoder.weight = self.encoder.weight self.w_xc = nn.Linear(args.shared_embed, args.shared_hid) self.w_xh = nn.Linear(args.shared_embed, args.shared_hid) # The raw weights are stored here because the hidden-to-hidden weights # are weight dropped on the forward pass. self.w_hc_raw = torch.nn.Parameter( torch.Tensor(args.shared_hid, args.shared_hid)) self.w_hh_raw = torch.nn.Parameter( torch.Tensor(args.shared_hid, args.shared_hid)) self.w_hc = None self.w_hh = None # should remove? or keep as it? since it is in the module list. self.w_h = collections.defaultdict(dict) self.w_c = collections.defaultdict(dict) # keep connections for WPL module self.connections = dict() self.w_h_opt = collections.defaultdict(dict) self.w_c_opt = collections.defaultdict(dict) self.fisher = {} self.fisher['w_h'] = collections.defaultdict(dict) self.fisher['w_c'] = collections.defaultdict(dict) for idx in range(args.num_blocks): for jdx in range(idx + 1, args.num_blocks): self.w_h[idx][jdx] = self.connections[node_to_key((idx, jdx, 'h'))] = \ nn.Linear(args.shared_hid, args.shared_hid, bias=False) self.w_c[idx][jdx] = self.connections[node_to_key((idx, jdx, 'c'))] = \ nn.Linear(args.shared_hid, args.shared_hid, bias=False) self._w_h = nn.ModuleList( [self.w_h[idx][jdx] for idx in self.w_h for jdx in self.w_h[idx]]) self._w_c = nn.ModuleList( [self.w_c[idx][jdx] for idx in self.w_c for jdx in self.w_c[idx]]) if args.mode == 'train': self.batch_norm = nn.BatchNorm1d(args.shared_hid) else: self.batch_norm = None self.reset_parameters() self.static_init_hidden = utils.keydefaultdict(self.init_hidden) self.wpl_monitored_modules = self.connections logger.info(f'# of parameters: {format(self.num_parameters, ",d")}')