def __init__(self, composition, device, context=None): if not torch_available: raise Exception( 'Pytorch python module (torch) is not installed. Please install it with ' '`pip install torch` or `pip3 install torch`') super(PytorchModelCreator, self).__init__() # Maps Mechanism -> PytorchMechanismWrapper self.nodes = [] self.component_map = {} # Maps Projections -> PytorchProjectionWrappers self.projections = [] self.projection_map = {} self.params = nn.ParameterList() self.device = device self._composition = composition # Instantiate pytorch mechanisms for node in set(composition.nodes) - set( composition.get_nodes_by_role(NodeRole.LEARNING)): pytorch_node = PytorchMechanismWrapper( node, self._composition._get_node_index(node), device, context=context) self.component_map[node] = pytorch_node self.nodes.append(pytorch_node) # Instantiate pytorch projections for projection in composition.projections: if projection.sender.owner in self.component_map and projection.receiver.owner in self.component_map: proj_send = self.component_map[projection.sender.owner] proj_recv = self.component_map[projection.receiver.owner] port_idx = projection.sender.owner.output_ports.index( projection.sender) new_proj = PytorchProjectionWrapper( projection, list(self._composition._inner_projections).index( projection), port_idx, device, sender=proj_send, receiver=proj_recv, context=context) proj_send.add_efferent(new_proj) proj_recv.add_afferent(new_proj) self.projection_map[projection] = new_proj self.projections.append(new_proj) self.params.append(new_proj.matrix) # Setup execution sets # 1) Remove all learning-specific nodes self.execution_sets = [ x - set(composition.get_nodes_by_role(NodeRole.LEARNING)) for x in composition.scheduler.run(context=context) ] # 2) Convert to pytorchcomponent representation self.execution_sets = [{ self.component_map[comp] for comp in s if comp in self.component_map } for s in self.execution_sets] # 3) Remove empty execution sets self.execution_sets = [x for x in self.execution_sets if len(x) > 0]
def __init__(self, use_spatial_model=True, gpu_cuda=True): super(PoseDetector, self).__init__() self.model_size = cfg.MODEL_SIZE self.output_shape = (60, 90, 10) self.use_spatial_model = use_spatial_model self.gpu_cuda = gpu_cuda self._test_dataloader = self.test_dataloader() # Model joints: self.joint_names = [ 'lsho', 'lelb', 'lwri', 'rsho', 'relb', 'rwri', 'lhip', 'rhip', 'nose', 'torso' ] self.joint_dependence = {} ## Assuming there is co-dependence between EVERY joint pairs for joint in self.joint_names: self.joint_dependence[joint] = [ joint_cond for joint_cond in self.joint_names if joint_cond != joint ] ## Initializing pairwise energies and bias between Joints self.pairwise_energies, self.pairwise_biases = {}, {} for joint in self.joint_names: #[:n_joints]: for cond_joint in self.joint_dependence[joint]: #TODO : manage dynamic sizing (in-place of 120,180) joint_key = joint + '_' + cond_joint if self.gpu_cuda: self.pairwise_energies[joint_key] = nn.Parameter( torch.ones([1, 119, 179, 1], dtype=torch.float32, requires_grad=True, device="cuda") / (119 * 179)) self.pairwise_biases[joint_key] = nn.Parameter( torch.ones([1, 60, 90, 1], dtype=torch.float32, requires_grad=True, device="cuda") * 1e-5) else: self.pairwise_energies[joint_key] = nn.Parameter( torch.ones([1, 119, 179, 1], dtype=torch.float32, requires_grad=True) / (119 * 179)) self.pairwise_biases[joint_key] = nn.Parameter( torch.ones([1, 60, 90, 1], dtype=torch.float32, requires_grad=True) * 1e-5) #This line is needed in order to pass all pairwise parameters to the optimizer self.pairwise_parameters = nn.ParameterList([ self.pairwise_energies[joint_key] for joint_key in self.pairwise_energies.keys() ] + [ self.pairwise_biases[joint_key] for joint_key in self.pairwise_biases.keys() ]) # Layers for full resolution image self.fullres_layer1 = nn.Sequential( nn.Conv2d(3, self.model_size * 1, 5, stride=1, padding=2), nn.ReLU(), nn.BatchNorm2d(self.model_size * 1), nn.MaxPool2d(2, stride=2)) self.fullres_layer2 = nn.Sequential( nn.Conv2d(self.model_size * 1, self.model_size * 2, 5, stride=1, padding=2), nn.ReLU(), nn.BatchNorm2d(self.model_size * 2), nn.MaxPool2d(2, stride=2)) self.fullres_layer3 = nn.Sequential( nn.Conv2d(self.model_size * 2, self.model_size * 4, 9, stride=1, padding=4), nn.ReLU(), nn.BatchNorm2d(self.model_size * 4), nn.MaxPool2d(2, stride=2)) # Layers for half resolution image self.halfres_layer1 = nn.Sequential( nn.Conv2d(3, self.model_size * 1, 5, stride=1, padding=2), nn.ReLU(), nn.BatchNorm2d(self.model_size * 1), nn.MaxPool2d(2, stride=2)) self.halfres_layer2 = nn.Sequential( nn.Conv2d(self.model_size * 1, self.model_size * 2, 5, stride=1, padding=2), nn.ReLU(), nn.BatchNorm2d(self.model_size * 2), nn.MaxPool2d(2, stride=2)) self.halfres_layer3 = nn.Sequential( nn.Conv2d(self.model_size * 2, self.model_size * 4, 9, stride=1, padding=4), nn.ReLU(), nn.BatchNorm2d(self.model_size * 4), nn.MaxPool2d(2, stride=2)) # Layers for quarter resolution image self.quarterres_layer1 = nn.Sequential( nn.Conv2d(3, self.model_size * 1, 5, stride=1, padding=2), nn.ReLU(), nn.BatchNorm2d(self.model_size * 1), nn.MaxPool2d(2, stride=2)) self.quarterres_layer2 = nn.Sequential( nn.Conv2d(self.model_size * 1, self.model_size * 2, 5, stride=1, padding=2), nn.ReLU(), nn.BatchNorm2d(self.model_size * 2), nn.MaxPool2d(2, stride=2, padding=1) #Adding padding so upsample dimension fit ) self.quarterres_layer3 = nn.Sequential( nn.Conv2d(self.model_size * 2, self.model_size * 4, 9, stride=1, padding=4), nn.ReLU(), nn.BatchNorm2d(self.model_size * 4), nn.MaxPool2d(2, stride=2)) # Last common layers self.last_layers = nn.Sequential( nn.Conv2d(self.model_size * 4, self.model_size * 4, 9, stride=1, padding=4), nn.ReLU(), nn.BatchNorm2d(self.model_size * 4), nn.Conv2d(self.model_size * 4, self.output_shape[2], 9, stride=1, padding=4)) ## Upsampling and downsampling self.conv_downsample = nn.Sequential( nn.Conv2d(3, 3, 3, stride=2, padding=1), nn.Conv2d(3, 3, 1, stride=1, padding=0)) self.conv_upsample = nn.ConvTranspose2d(self.model_size * 4, self.model_size * 4, 3, stride=2, padding=1) self.conv1_1 = nn.Conv2d(self.model_size * 4, self.model_size * 4, 1, stride=1, padding=0) ## Softplus for spatial model self.softplus = nn.Softplus(beta=5) ## Batchnorm for spatial model self.BN_SM = nn.BatchNorm2d(self.output_shape[2])
def __init__(self, params, placedb): """ @brief initialization @param params parameter @param placedb placement database """ torch.manual_seed(params.random_seed) super(BasicPlace, self).__init__() tt = time.time() self.init_pos = np.zeros(placedb.num_nodes * 2, dtype=placedb.dtype) # x position self.init_pos[0:placedb.num_physical_nodes] = placedb.node_x if params.global_place_flag and params.random_center_init_flag: # move to center of layout logging.info( "move cells to the center of layout with random noise") self.init_pos[0:placedb.num_movable_nodes] = np.random.normal( loc=(placedb.xl * 1.0 + placedb.xh * 1.0) / 2, scale=(placedb.xh - placedb.xl) * 0.001, size=placedb.num_movable_nodes) #self.init_pos[0:placedb.num_movable_nodes] = init_x[0:placedb.num_movable_nodes]*0.01 + (placedb.xl+placedb.xh)/2 # y position self.init_pos[placedb.num_nodes:placedb.num_nodes + placedb.num_physical_nodes] = placedb.node_y if params.global_place_flag and params.random_center_init_flag: # move to center of layout self.init_pos[placedb.num_nodes:placedb.num_nodes + placedb.num_movable_nodes] = np.random.normal( loc=(placedb.yl * 1.0 + placedb.yh * 1.0) / 2, scale=(placedb.yh - placedb.yl) * 0.001, size=placedb.num_movable_nodes) #init_y[0:placedb.num_movable_nodes] = init_y[0:placedb.num_movable_nodes]*0.01 + (placedb.yl+placedb.yh)/2 if placedb.num_filler_nodes: # uniformly distribute filler cells in the layout self.init_pos[placedb.num_physical_nodes:placedb. num_nodes] = np.random.uniform( low=placedb.xl, high=placedb.xh - placedb.node_size_x[-placedb.num_filler_nodes], size=placedb.num_filler_nodes) self.init_pos[placedb.num_nodes + placedb.num_physical_nodes:placedb.num_nodes * 2] = np.random.uniform( low=placedb.yl, high=placedb.yh - placedb.node_size_y[-placedb.num_filler_nodes], size=placedb.num_filler_nodes) logging.debug("prepare init_pos takes %.2f seconds" % (time.time() - tt)) self.device = torch.device("cuda" if params.gpu else "cpu") # position should be parameter # must be defined in BasicPlace tt = time.time() self.pos = nn.ParameterList( [nn.Parameter(torch.from_numpy(self.init_pos).to(self.device))]) logging.debug("build pos takes %.2f seconds" % (time.time() - tt)) # shared data on device for building ops # I do not want to construct the data from placedb again and again for each op tt = time.time() self.data_collections = PlaceDataCollection(self.pos, params, placedb, self.device) logging.debug("build data_collections takes %.2f seconds" % (time.time() - tt)) # similarly I wrap all ops tt = time.time() self.op_collections = PlaceOpCollection() logging.debug("build op_collections takes %.2f seconds" % (time.time() - tt)) tt = time.time() # position to pin position self.op_collections.pin_pos_op = self.build_pin_pos( params, placedb, self.data_collections, self.device) # bound nodes to layout region self.op_collections.move_boundary_op = self.build_move_boundary( params, placedb, self.data_collections, self.device) # hpwl and density overflow ops for evaluation self.op_collections.hpwl_op = self.build_hpwl( params, placedb, self.data_collections, self.op_collections.pin_pos_op, self.device) # rectilinear minimum steiner tree wirelength from flute # can only be called once #self.op_collections.rmst_wl_op = self.build_rmst_wl(params, placedb, self.op_collections.pin_pos_op, torch.device("cpu")) #self.op_collections.density_overflow_op = self.build_density_overflow(params, placedb, self.data_collections, self.device) self.op_collections.density_overflow_op = self.build_electric_overflow( params, placedb, self.data_collections, self.device) # legality check self.op_collections.legality_check_op = self.build_legality_check( params, placedb, self.data_collections, self.device) # legalization self.op_collections.legalize_op = self.build_legalization( params, placedb, self.data_collections, self.device) # detailed placement self.op_collections.detailed_place_op = self.build_detailed_placement( params, placedb, self.data_collections, self.device) # draw placement self.op_collections.draw_place_op = self.build_draw_placement( params, placedb) # flag for rmst_wl_op # can only read once self.read_lut_flag = True logging.debug("build BasicPlace ops takes %.2f seconds" % (time.time() - tt))
def __init__(self, in_channels: List, latent_dim: int, n_dataset: List, hidden_dims: List = None, alpha: float = None, gamma: float = 1000., max_capacity: int = 25, capacity_max_iter: int = 1e5, loss_type: str = 'B', intercept_adj: bool = True, slope_adj: bool = True, log=False): super(VAE, self).__init__() self.latent_dim = latent_dim if alpha is None: self.alpha = 50.0 / latent_dim else: self.alpha = alpha self.gamma = gamma # TODO: what is gamma self.loss_type = loss_type self.C_max = torch.Tensor([max_capacity]) self.C_stop_iter = capacity_max_iter self.device = "cuda" if torch.cuda.is_available() else "cpu" self.experts = ProductOfExperts() self.n_dataset = n_dataset self.intercept_adj = intercept_adj self.slope_adj = slope_adj self.log = log self.beta = nn.ParameterList() for in_ch in in_channels: self.beta.append( torch.nn.Parameter(xavier_init(latent_dim, in_ch), requires_grad=True)) self.beta_dataset = nn.ParameterList() for in_ch, n_d in zip(in_channels, n_dataset): self.beta_dataset.append( torch.nn.Parameter(xavier_init(n_d, in_ch), requires_grad=True)) self.beta_dataset_mtp = nn.ParameterList() for in_ch, n_d in zip(in_channels, n_dataset): # torch,rand returns uniform[0, 1) self.beta_dataset_mtp.append( torch.nn.Parameter(torch.rand(n_d, in_ch), requires_grad=True)) if hidden_dims is None: hidden_dims = [128, 64] # Constructing Laplace Approximation to Dirichlet Prior # The greater the alpha, the higher the mode. That is, the probs will # be more centered around (1/latent_dim, ..., 1/latent_dim) self.a = self.alpha * torch.ones(1, self.latent_dim) self.mu2 = (torch.log(self.a) - torch.mean(torch.log(self.a), 1)).to(device=self.device) self.var2 = (((1 / self.a) * (1 - (2.0 / self.latent_dim))) + (1.0 / (self.latent_dim * self.latent_dim)) * torch.sum(1 / self.a, 1)).to(device=self.device) self.encoder = nn.ModuleList() self.fc_mu = nn.ModuleList() self.fc_var = nn.ModuleList() for in_ch in in_channels: # Build Encoder modules = [] current_in = in_ch for h_dim in hidden_dims: modules.append( nn.Sequential(nn.Linear(current_in, h_dim), nn.BatchNorm1d(h_dim), nn.LeakyReLU() ) # the original paper use tf.nn.softplus ) current_in = h_dim self.encoder.append(nn.Sequential(*modules)) self.fc_mu.append(nn.Linear(hidden_dims[-1], latent_dim)) self.fc_var.append(nn.Linear(hidden_dims[-1], latent_dim))
def __init__(self, points=1024, class_num=40, embed_dim=64, heads=4, dim_head=32, pre_blocks=[2, 2, 2, 2], pos_blocks=[2, 2, 2, 2], k_neighbors=[32, 32, 32, 32], reducers=[2, 2, 2, 2], **kwargs): super(Model7, self).__init__() self.stages = len(pre_blocks) self.class_num = class_num self.heads = heads self.dim_head = dim_head self.points = points self.embedding = nn.Sequential(FCBNReLU1D(3, embed_dim), nn.Conv1d(embed_dim, embed_dim, 1)) assert len(pre_blocks)==len(k_neighbors)==len(reducers)==len(pos_blocks), \ "Please check stage number consistent for pre_blocks, pos_blocks k_neighbors, reducers." self.local_grouper_list = nn.ModuleList() self.pre_blocks_list = nn.ModuleList() self.pos_blocks_list = nn.ModuleList() self.local_token_list = nn.ParameterList() self.global_token_list = nn.ParameterList() last_channel = embed_dim anchor_points = self.points for i in range(len(pre_blocks)): out_channel = last_channel * 2 pre_block_num = pre_blocks[i] pos_block_num = pos_blocks[i] kneighbor = k_neighbors[i] reduce = reducers[i] anchor_points = anchor_points // reduce # dim_head = out_channel*2//self.heads # append local_grouper_list local_grouper = LocalGrouper(anchor_points, kneighbor) #[b,g,k,d] self.local_grouper_list.append(local_grouper) # append pre_block_list pre_block_module = PreExtraction(out_channel, pre_block_num, heads=self.heads, dim_head=self.dim_head) self.pre_blocks_list.append(pre_block_module) local_token = nn.Parameter(torch.rand([1, 1, 1, out_channel])) self.local_token_list.append(local_token) # append pos_block_list pos_block_module = PosExtraction(out_channel, pos_block_num, heads=self.heads, dim_head=self.dim_head) self.pos_blocks_list.append(pos_block_module) global_token = nn.Parameter(torch.rand([1, 1, out_channel])) self.global_token_list.append(global_token) last_channel = out_channel self.classifier = nn.Sequential( nn.Linear(last_channel, last_channel // 4), nn.BatchNorm1d(last_channel // 4), nn.ReLU(), nn.Dropout(0.2), nn.Linear(last_channel // 4, self.class_num))
def __init__( self, triples_factory: TriplesFactory, embedding_dim: int = 500, num_bases_or_blocks: int = 5, num_layers: int = 2, use_bias: bool = True, use_batch_norm: bool = False, activation_cls: Optional[Type[nn.Module]] = None, activation_kwargs: Optional[Mapping[str, Any]] = None, sparse_messages_slcwa: bool = True, edge_dropout: float = 0.4, self_loop_dropout: float = 0.2, edge_weighting: Callable[ [torch.LongTensor, torch.LongTensor], torch.FloatTensor, ] = inverse_indegree_edge_weights, decomposition: str = 'basis', buffer_messages: bool = True, base_representations: Optional[RepresentationModule] = None, ): super().__init__() self.triples_factory = triples_factory # normalize representations if base_representations is None: base_representations = Embedding( num_embeddings=triples_factory.num_entities, embedding_dim=embedding_dim, # https://github.com/MichSchli/RelationPrediction/blob/c77b094fe5c17685ed138dae9ae49b304e0d8d89/code/encoders/affine_transform.py#L24-L28 initializer=nn.init.xavier_uniform_, ) self.base_embeddings = base_representations self.embedding_dim = embedding_dim # check decomposition self.decomposition = decomposition if self.decomposition == 'basis': if num_bases_or_blocks is None: logging.info( 'Using a heuristic to determine the number of bases.') num_bases_or_blocks = triples_factory.num_relations // 2 + 1 if num_bases_or_blocks > triples_factory.num_relations: raise ValueError( 'The number of bases should not exceed the number of relations.' ) elif self.decomposition == 'block': if num_bases_or_blocks is None: logging.info( 'Using a heuristic to determine the number of blocks.') num_bases_or_blocks = 2 if embedding_dim % num_bases_or_blocks != 0: raise ValueError( 'With block decomposition, the embedding dimension has to be divisible by the number of' f' blocks, but {embedding_dim} % {num_bases_or_blocks} != 0.', ) else: raise ValueError( f'Unknown decomposition: "{decomposition}". Please use either "basis" or "block".' ) self.num_bases = num_bases_or_blocks self.edge_weighting = edge_weighting self.edge_dropout = edge_dropout if self_loop_dropout is None: self_loop_dropout = edge_dropout self.self_loop_dropout = self_loop_dropout self.use_batch_norm = use_batch_norm if activation_cls is None: activation_cls = nn.ReLU self.activation_cls = activation_cls self.activation_kwargs = activation_kwargs if use_batch_norm: if use_bias: logger.warning( 'Disabling bias because batch normalization was used.') use_bias = False self.use_bias = use_bias self.num_layers = num_layers self.sparse_messages_slcwa = sparse_messages_slcwa # Save graph using buffers, such that the tensors are moved together with the model h, r, t = self.triples_factory.mapped_triples.t() self.register_buffer('sources', h) self.register_buffer('targets', t) self.register_buffer('edge_types', r) self.activations = nn.ModuleList([ self.activation_cls(**(self.activation_kwargs or {})) for _ in range(self.num_layers) ]) # Weights self.bases = nn.ParameterList() if self.decomposition == 'basis': self.att = nn.ParameterList() for _ in range(self.num_layers): self.bases.append( nn.Parameter( data=torch.empty( self.num_bases, self.embedding_dim, self.embedding_dim, ), requires_grad=True, )) self.att.append( nn.Parameter( data=torch.empty( self.triples_factory.num_relations + 1, self.num_bases, ), requires_grad=True, )) elif self.decomposition == 'block': block_size = self.embedding_dim // self.num_bases for _ in range(self.num_layers): self.bases.append( nn.Parameter( data=torch.empty( self.triples_factory.num_relations + 1, self.num_bases, block_size, block_size, ), requires_grad=True, )) self.att = None else: raise NotImplementedError if self.use_bias: self.biases = nn.ParameterList([ nn.Parameter(torch.empty(self.embedding_dim), requires_grad=True) for _ in range(self.num_layers) ]) else: self.biases = None if self.use_batch_norm: self.batch_norms = nn.ModuleList([ nn.BatchNorm1d(num_features=self.embedding_dim) for _ in range(self.num_layers) ]) else: self.batch_norms = None # buffering of messages self.buffer_messages = buffer_messages self.enriched_embeddings = None
def __init__(self, LayerNo): super(LPD_Net, self).__init__() self.name = "LPD_Net" self.LayerNo = LayerNo self.filter_size = 3 self.conv_size = 32 self.eta_step = nn.ParameterList() self.sigma_step = nn.ParameterList() self.soft_thr = nn.ParameterList() self.soft_a = nn.ParameterList() self.delta = nn.ParameterList() self.A2 = nn.ModuleList() self.B = nn.ModuleList() self.AT2 = nn.ModuleList() self.BT = nn.ModuleList() for _ in range(self.LayerNo): self.eta_step.append(nn.Parameter(torch.Tensor([0.1]))) self.sigma_step.append(nn.Parameter(torch.Tensor([1]))) self.soft_thr.append(nn.Parameter(torch.Tensor([0.1]))) self.soft_a.append(nn.Parameter(torch.Tensor([50]))) self.delta.append(nn.Parameter(torch.Tensor([0.1]))) self.A2.append( nn.Conv2d(1, self.conv_size, kernel_size=3, stride=1, padding=1, bias=False)) self.B.append( nn.Conv2d(self.conv_size, self.conv_size, kernel_size=3, stride=1, padding=1, bias=False)) self.AT2.append( nn.Conv2d(self.conv_size, 1, kernel_size=3, stride=1, padding=1, bias=False)) self.BT.append( nn.Conv2d(self.conv_size, self.conv_size, kernel_size=3, stride=1, padding=1, bias=False)) nn.init.xavier_normal_(self.A2[0].weight) nn.init.xavier_normal_(self.B[0].weight) nn.init.xavier_normal_(self.AT2[0].weight) nn.init.xavier_normal_(self.BT[0].weight)
def __init__( self, block: Type[Union[BasicBlock, Bottleneck]], layers: List[int], num_classes: int = 10, zero_init_residual: bool = False, groups: int = 1, width_per_group: int = 64, replace_stride_with_dilation: Optional[List[bool]] = None, norm_layer: Optional[Callable[..., nn.Module]] = None) -> None: super(ResNet, self).__init__() if norm_layer is None: norm_layer = nn.BatchNorm2d self._norm_layer = norm_layer self.inplanes = 64 self.dilation = 1 if replace_stride_with_dilation is None: # each element in the tuple indicates if we should replace # the 2x2 stride with a dilated convolution instead replace_stride_with_dilation = [False, False, False] if len(replace_stride_with_dilation) != 3: raise ValueError("replace_stride_with_dilation should be None " "or a 3-element tuple, got {}".format( replace_stride_with_dilation)) self.groups = groups self.base_width = width_per_group self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False) self.bn1 = norm_layer(self.inplanes) self.relu = nn.ReLU(inplace=True) self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) self.layer1 = self._make_layer(block, 64, layers[0]) self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0]) self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1]) self.layer4 = self._make_layer(block, 512, layers[3], stride=2, dilate=replace_stride_with_dilation[2]) self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) self.fc = nn.Linear(512 * block.expansion, num_classes) self.ks = nn.ParameterList([ nn.Parameter(torch.Tensor(1).uniform_(0.75, 0.8)) for i in range(layers[0] + layers[1] + layers[2]) ]) for m in self.modules(): if isinstance(m, nn.Conv2d): nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu') elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)): nn.init.constant_(m.weight, 1) nn.init.constant_(m.bias, 0) # Zero-initialize the last BN in each residual branch, # so that the residual branch starts with zeros, and each residual block behaves like an identity. # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677 if zero_init_residual: for m in self.modules(): if isinstance(m, Bottleneck): nn.init.constant_(m.bn3.weight, 0) # type: ignore[arg-type] elif isinstance(m, BasicBlock): nn.init.constant_(m.bn2.weight, 0) # type: ignore[arg-type]
def create_param_from_shapes(self, list_param_shapes): self.list_params = [] for s in list_param_shapes: self.list_params.append(Parameter(torch.Tensor(s))) self.list_params = nn.ParameterList(self.list_params)
def __init__( self, input_size, C_in, C, n_classes, n_layers, auxiliary, genotype, stem_multiplier=3, feature_scale_rate=2, PRIMITIVES=gt.PRIMITIVES, reduction_layers=[], ): """ Args: input_size: size of height and width (assuming height = width) C_in: # of input channels C: # of starting model channels """ super().__init__() self.C_in = C_in self.C = C self.n_classes = n_classes self.n_layers = n_layers self.aux_pos = 2 * n_layers // 3 if auxiliary else -1 C_cur = stem_multiplier * C self.stem = nn.Sequential( nn.Conv2d(C_in, C_cur, 3, 1, 1, bias=False), nn.BatchNorm2d(C_cur) ) C_pp, C_p, C_cur = C_cur, C_cur, C self.cells = nn.ModuleList() reduction_p = False if not reduction_layers: reduction_layers = [n_layers // 3, (2 * n_layers) // 3] for i in range(n_layers): if i in reduction_layers: C_cur *= feature_scale_rate reduction = True else: reduction = False cell = AugmentCell(genotype, C_pp, C_p, C_cur, reduction_p, reduction) reduction_p = reduction self.cells.append(cell) C_cur_out = C_cur * len(cell.concat) C_pp, C_p = C_p, C_cur_out if i == self.aux_pos: # [!] this auxiliary head is ignored in computing parameter size # by the name 'aux_head' self.aux_head = AuxiliaryHead(input_size // 4, C_p, n_classes) self.gap = nn.AdaptiveAvgPool2d(1) self.linear = nn.Linear(C_p, n_classes) self.criterion = nn.CrossEntropyLoss() ####### dummy alphas self.alpha_normal = nn.ParameterList() self.alpha_reduce = nn.ParameterList() for i in range(2): self.alpha_normal.append(nn.Parameter(1e-3 * torch.randn(1, 5))) self.alpha_reduce.append(nn.Parameter(1e-3 * torch.randn(1, 5))) # setup alphas list self._alphas = [] for n, p in self.named_parameters(): if "alpha" in n: self._alphas.append((n, p)) self.alpha_prune_threshold = 0.0
def __init__(self, hnet, hnet_uncond_in_size=None, sigma_noise=0.02, input_handler=None, output_handler=None, verbose=True): # FIXME find a way using super to handle multiple inheritance. nn.Module.__init__(self) HyperNetInterface.__init__(self) assert isinstance(hnet, HyperNetInterface) self._hnet = hnet self._hnet_uncond_in_size = hnet_uncond_in_size self._sigma_noise = sigma_noise self._input_handler = input_handler self._output_handler = output_handler if input_handler is None and hnet_uncond_in_size is None: raise ValueError( 'Either "input_handler" or "hnet_uncond_in_size"' + ' has to be specified.') ### Setup attributes required by interface ### # Most of these attributes are taken over from `self._hnet` self._target_shapes = hnet.target_shapes self._num_known_conds = self._hnet.num_known_conds self._unconditional_param_shapes_ref = \ list(self._hnet.unconditional_param_shapes_ref) if self._hnet.internal_params is not None: self._internal_params = \ nn.ParameterList(self._hnet.internal_params) self._param_shapes = list(self._hnet.param_shapes) self._param_shapes_meta = list(self._hnet.param_shapes_meta) if self._hnet.hyper_shapes_learned is not None: self._hyper_shapes_learned = list(self._hnet.hyper_shapes_learned) self._hyper_shapes_learned_ref = \ list(self._hnet.hyper_shapes_learned_ref) if self._hnet.hyper_shapes_distilled is not None: self._hyper_shapes_distilled = \ list(self._hnet.hyper_shapes_distilled) self._has_bias = self._hnet.has_bias # A noise perturbed output can't be considered an FC output anymore. self._has_fc_out = False self._mask_fc_out = self._hnet.mask_fc_out # Guess that's the safest answer. self._has_linear_out = False self._layer_weight_tensors = \ nn.ParameterList(self._hnet.layer_weight_tensors) self._layer_bias_vectors = \ nn.ParameterList(self._hnet.layer_bias_vectors) if self._hnet.batchnorm_layers is not None: self._batchnorm_layers = nn.ModuleList(self._hnet.batchnorm_layers) if self._hnet.context_mod_layers is not None: self._context_mod_layers = \ nn.ModuleList(self._hnet.context_mod_layers) ### Finalize construction ### self._is_properly_setup() if verbose: print('Wrapped a perturbation interface around a hypernetwork.')
def __init__(self, n_in_enc, graph_args_j, graph_args_p, graph_args_b, edge_weighting, fusion_layer, cross_w, **kwargs): super().__init__() self.graph_j = Graph_J(**graph_args_j) self.graph_p = Graph_P(**graph_args_p) self.graph_b = Graph_B(**graph_args_b) A_j = torch.tensor(self.graph_j.A_j, dtype=torch.float32, requires_grad=False) self.register_buffer('A_j', A_j) A_p = torch.tensor(self.graph_p.A_p, dtype=torch.float32, requires_grad=False) self.register_buffer('A_p', A_p) A_b = torch.tensor(self.graph_b.A_b, dtype=torch.float32, requires_grad=False) self.register_buffer('A_b', A_b) t_ksize, s_ksize_1, s_ksize_2, s_ksize_3 = 5, self.A_j.size( 0), self.A_p.size(0), self.A_b.size(0) ksize_1 = (t_ksize, s_ksize_1) ksize_2 = (t_ksize, s_ksize_2) ksize_3 = (t_ksize, s_ksize_3) self.s2_init = AveargeJoint() self.s3_init = AveargePart() self.s1_l1 = St_gcn(n_in_enc, 32, ksize_1, stride=1, residual=False, **kwargs) self.s1_l2 = St_gcn(32, 64, ksize_1, stride=2, **kwargs) self.s1_l3 = St_gcn(64, 128, ksize_1, stride=2, **kwargs) self.s1_l4 = St_gcn(128, 256, ksize_1, stride=2, **kwargs) self.s1_l5 = St_gcn(256, 256, ksize_1, stride=1, **kwargs) self.s2_l1 = St_gcn(n_in_enc, 32, ksize_2, stride=1, residual=False, **kwargs) self.s2_l2 = St_gcn(32, 64, ksize_2, stride=2, **kwargs) self.s2_l3 = St_gcn(64, 128, ksize_2, stride=2, **kwargs) self.s2_l4 = St_gcn(128, 256, ksize_2, stride=2, **kwargs) self.s3_l1 = St_gcn(n_in_enc, 32, ksize_3, stride=1, residual=False, **kwargs) self.s3_l2 = St_gcn(32, 64, ksize_3, stride=2, **kwargs) self.s3_l3 = St_gcn(64, 128, ksize_3, stride=2, **kwargs) self.s3_l4 = St_gcn(128, 256, ksize_3, stride=2, **kwargs) self.s2_back = PartLocalInform() self.s3_back = BodyLocalInform() self.fusion_layer = fusion_layer self.cross_w = cross_w if self.fusion_layer == 0: pass elif self.fusion_layer == 1: self.j2p_1 = S1_to_S2(n_j1=32, n_j2=(800, 256), n_p1=32, n_p2=(800, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) self.p2j_1 = S2_to_S1(n_p1=32, n_p2=(800, 256), n_j1=32, n_j2=(800, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) self.p2b_1 = S2_to_S3(n_p1=32, n_p2=(800, 256), n_b1=32, n_b2=(800, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) self.b2p_1 = S3_to_S2(n_b1=32, n_b2=(800, 256), n_p1=32, n_p2=(800, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) elif self.fusion_layer == 2: self.j2p_1 = S1_to_S2(n_j1=32, n_j2=(800, 256), n_p1=32, n_p2=(800, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) self.p2j_1 = S2_to_S1(n_p1=32, n_p2=(800, 256), n_j1=32, n_j2=(800, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) self.p2b_1 = S2_to_S3(n_p1=32, n_p2=(800, 256), n_b1=32, n_b2=(800, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) self.b2p_1 = S3_to_S2(n_b1=32, n_b2=(800, 256), n_p1=32, n_p2=(800, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) self.j2p_2 = S1_to_S2(n_j1=64, n_j2=(832, 256), n_p1=64, n_p2=(832, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) self.p2j_2 = S2_to_S1(n_p1=64, n_p2=(832, 256), n_j1=64, n_j2=(832, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) self.p2b_2 = S2_to_S3(n_p1=64, n_p2=(832, 256), n_b1=64, n_b2=(832, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) self.b2p_2 = S3_to_S2(n_b1=64, n_b2=(832, 256), n_p1=64, n_p2=(832, 256), t_kernel=5, t_stride=(1, 2), t_padding=2) else: raise ValueError('No Such Fusion Architecture') if edge_weighting: self.emul_s1 = nn.ParameterList( [nn.Parameter(torch.ones(self.A_j.size())) for i in range(5)]) self.eadd_s1 = nn.ParameterList( [nn.Parameter(torch.zeros(self.A_j.size())) for i in range(5)]) self.emul_s2 = nn.ParameterList( [nn.Parameter(torch.ones(self.A_p.size())) for i in range(4)]) self.eadd_s2 = nn.ParameterList( [nn.Parameter(torch.zeros(self.A_p.size())) for i in range(4)]) self.emul_s3 = nn.ParameterList( [nn.Parameter(torch.ones(self.A_b.size())) for i in range(4)]) self.eadd_s3 = nn.ParameterList( [nn.Parameter(torch.zeros(self.A_b.size())) for i in range(4)]) else: self.emul_s1 = [1] * 0 self.eadd_s1 = nn.ParameterList( [nn.Parameter(torch.zeros(self.A_j.size())) for i in range(5)]) self.emul_s2 = [1] * 4 self.eadd_s2 = nn.ParameterList( [nn.Parameter(torch.zeros(self.A_p.size())) for i in range(4)]) self.emul_s3 = [1] * 4 self.eadd_s3 = nn.ParameterList( [nn.Parameter(torch.zeros(self.A_b.size())) for i in range(4)])
def __init__(self, in_shape=[32, 32, 3], num_classes=10, verbose=True, arch='cifar', no_weights=False, init_weights=None, dropout_rate=0.25): super(ZenkeNet, self).__init__(num_classes, verbose) assert (in_shape[0] == 32 and in_shape[1] == 32) self._in_shape = in_shape assert (arch in ZenkeNet._architectures.keys()) self._param_shapes = ZenkeNet._architectures[arch] self._param_shapes[-2][0] = num_classes self._param_shapes[-1][0] = num_classes assert (init_weights is None or no_weights is False) self._no_weights = no_weights self._use_dropout = dropout_rate != -1 self._has_bias = True self._has_fc_out = True # We need to make sure that the last 2 entries of `weights` correspond # to the weight matrix and bias vector of the last layer. self._mask_fc_out = True # We don't use any output non-linearity. self._has_linear_out = True self._num_weights = MainNetInterface.shapes_to_num_weights( \ self._param_shapes) if verbose: print('Creating a ZenkeNet with %d weights' \ % (self._num_weights) + (', that uses dropout.' if self._use_dropout else '.')) if self._use_dropout: if dropout_rate > 0.5: # FIXME not a pretty solution, but we aim to follow the original # paper. raise ValueError('Dropout rate must be smaller equal 0.5.') self._drop_conv = nn.Dropout2d(p=dropout_rate) self._drop_fc1 = nn.Dropout(p=dropout_rate * 2.) self._layer_weight_tensors = nn.ParameterList() self._layer_bias_vectors = nn.ParameterList() if no_weights: self._weights = None self._hyper_shapes_learned = self._param_shapes self._is_properly_setup() return ### Define and initialize network weights. # Each odd entry of this list will contain a weight Tensor and each # even entry a bias vector. self._weights = nn.ParameterList() for i, dims in enumerate(self._param_shapes): self._weights.append( nn.Parameter(torch.Tensor(*dims), requires_grad=True)) if i % 2 == 0: self._layer_weight_tensors.append(self._weights[i]) else: assert (len(dims) == 1) self._layer_bias_vectors.append(self._weights[i]) if init_weights is not None: assert (len(init_weights) == len(self._param_shapes)) for i in range(len(init_weights)): assert (np.all( np.equal(list(init_weights[i].shape), list(self._weights[i].shape)))) self._weights[i].data = init_weights[i] else: for i in range(len(self._layer_weight_tensors)): init_params(self._layer_weight_tensors[i], self._layer_bias_vectors[i]) self._is_properly_setup()
def _initialize_alpha(self): k = sum(1 for i in range(self.nodes) for n in range(2+i)) num_ops = len(ATT_PRIMITIVES) self.alphas = nn.Parameter(1e-3 * torch.randn(k, num_ops).cuda(), requires_grad=True) self._arch_param = nn.ParameterList([self.alphas])
def __init__(self, in_feat, out_feat, num_rels, num_bases=-1, bias=None, activation=None, is_input_layer=False, ranks=None, input_dropout=0.2, rank_per=0.1, decomp='tucker'): super(RGCNTorchTuckerLayer, self).__init__(in_feat, out_feat, bias, activation) self.in_feat = in_feat self.out_feat = out_feat self.num_rels = num_rels self.num_bases = num_bases self.is_input_layer = is_input_layer self.num_bases = self.num_rels # calculate # if is_input_layer: # self.ranks = [self.num_bases, rank_per, self.out_feat] # else: # self.ranks = [self.num_bases, self.in_feat, self.out_feat] if ranks[0] == -1: ranks[0] = self.num_rels if ranks[1] == -1: ranks[1] = in_feat if self.is_input_layer: self.ranks = [ranks[0], ranks[1], self.out_feat] else: self.ranks = [ranks[0], self.in_feat, self.out_feat] print("Ranks - {}".format(self.ranks)) # add basis weights if decomp == 'tucker': self.weight = tn.randn(self.num_bases, self.in_feat, self.out_feat, ranks_tucker=self.ranks, device='cuda', requires_grad=True) elif decomp == 'tt': self.weight = tn.randn(self.num_bases, self.in_feat, self.out_feat, ranks_tt=self.ranks, device='cuda', requires_grad=True) else: raise NotImplementedError("decomposition not implemented") # self.core = nn.Parameter(torch.empty((self.ranks[0], self.ranks[1], self.ranks[2]))) # self.factor_1 = nn.Parameter(torch.empty((weight.shape[0], self.ranks[0]))) # self.factor_2 = nn.Parameter(torch.empty((weight.shape[1], self.ranks[1]))) # self.factor_3 = nn.Parameter(torch.empty((weight.shape[2], self.ranks[2]))) self.input_dropout = torch.nn.Dropout(input_dropout) self.bnw = torch.nn.BatchNorm1d(self.in_feat) # self.factors = nn.ParameterList([]) # for f_i,f in enumerate(factors): # fac = nn.Parameter(f) # # self.register_parameter('tucker_factor_{}'.format(f_i), fac) # self.factors.append(fac) # # self.weight_full = nn.Parameter(self.weight.torch()) cores = [] for c_i, core in enumerate(self.weight.cores): core = nn.Parameter(core) #nn.init.xavier_normal_(core, gain=nn.init.calculate_gain('sigmoid')) self.register_parameter('tucker_core_{}'.format(c_i), core) cores.append(core) self.weight.cores = cores Us = [] for u_i, u in enumerate(self.weight.Us): u = nn.Parameter(u) #nn.init.orthogonal(u, gain=nn.init.calculate_gain('sigmoid')) self.register_parameter('tucker_Us_{}'.format(u_i), u) Us.append(u) self.weight.Us = Us self.model_params = nn.ParameterList(cores + Us)
def __init__(self, num_tokens_per_channel, codebook_dim, upscale_factors, list_of_num_layers, n_head, d_model, dim_feedforward, num_tokens_bottleneck, dropout): super(AuxiliaryDecoderRelative, self).__init__() assert len(list_of_num_layers) == len(upscale_factors) self.num_tokens_per_channel = num_tokens_per_channel self.num_channels = len(self.num_tokens_per_channel) self.d_model = d_model self.codebook_dim = codebook_dim self.upscale_factors = upscale_factors self.linear = nn.Linear(self.codebook_dim, self.d_model) # TODO factorised positional embeddings positional_embedding_size = self.d_model self.upscale_embeddings = nn.ParameterList( [ nn.Parameter( torch.randn(upscale, self.d_model) ) for upscale in self.upscale_factors ] ) # build transformer list self.num_tokens_per_transformer_block = [ num_tokens_bottleneck * int(np.prod(self.upscale_factors[:i])) for i in range(len(self.upscale_factors)) ] # self.code_embedding_dim = self.d_model # - positional_embedding_size # TODO for now sum positional embedding self.code_embedding_dim = self.d_model - positional_embedding_size transformer_list = [] for i, (num_layers, num_tokens) in enumerate( zip(list_of_num_layers, self.num_tokens_per_transformer_block)): encoder_layer = TransformerEncoderLayerCustom( d_model=self.d_model, nhead=n_head, attention_bias_type='relative_attention', dim_feedforward=dim_feedforward, dropout=dropout, num_events=num_tokens // self.num_channels, num_channels=self.num_channels ) transformer = TransformerEncoderCustom( encoder_layer=encoder_layer, num_layers=num_layers, ) transformer_list.append(transformer) self.transformers = nn.ModuleList( transformer_list ) self.pre_softmaxes = nn.ModuleList([nn.Linear(self.d_model, num_notes) for num_notes in num_tokens_per_channel ] )
def __init__(self, list_length): super(MultiLossLayer, self).__init__() self._sigmas_sq = nn.ParameterList( [nn.Parameter(torch.empty(())) for i in range(list_length)]) for p in self.parameters(): nn.init.uniform_(p, 0.5, 0.8)
def __init__(self, config, imgc, imgsz, device): super(Learner, self).__init__() self.config2vars = [None] * len(config) self.config2vars_bn = [None] * len(config) self.config = config self.device = device # this dict contains all tensors needed to be optimized self.vars = nn.ParameterList() # running_mean and running_var self.vars_bn = nn.ParameterList() self.pruning_record = [] for i, (name, param) in enumerate(self.config): if name is 'conv2d': # [ch_out, ch_in, kernelsz, kernelsz] w = nn.Parameter(torch.ones(*param[:4])) # gain=1 according to cbfin's implementation torch.nn.init.kaiming_normal_(w) self.vars.append(w) # [ch_out] self.vars.append(nn.Parameter(torch.zeros(param[0]))) self.config2vars[i] = len(self.vars) // 2 - 1 elif name is 'convt2d': # [ch_in, ch_out, kernelsz, kernelsz, stride, padding] w = nn.Parameter(torch.ones(*param[:4])) # gain=1 according to cbfin's implementation torch.nn.init.kaiming_normal_(w) self.vars.append(w) # [ch_in, ch_out] self.vars.append(nn.Parameter(torch.zeros(param[1]))) self.config2vars[i] = len(self.vars) // 2 - 1 elif name is 'linear': # [ch_out, ch_in] w = nn.Parameter(torch.ones(*param)) # gain=1 according to cbfinn's implementation torch.nn.init.kaiming_normal_(w) self.vars.append(w) # [ch_out] self.vars.append(nn.Parameter(torch.zeros(param[0]))) self.config2vars[i] = len(self.vars) // 2 - 1 elif name is 'bn': # [ch_out] w = nn.Parameter(torch.ones(param[0])) self.vars.append(w) # [ch_out] self.vars.append(nn.Parameter(torch.zeros(param[0]))) self.config2vars[i] = len(self.vars) // 2 - 1 # must set requires_grad=False running_mean = nn.Parameter(torch.zeros(param[0]), requires_grad=False) running_var = nn.Parameter(torch.ones(param[0]), requires_grad=False) self.vars_bn.extend([running_mean, running_var]) self.config2vars_bn[i] = len(self.vars_bn) // 2 - 1 elif name in ['tanh', 'relu', 'upsample', 'avg_pool2d', 'max_pool2d', 'flatten', 'reshape', 'leakyrelu', 'sigmoid']: continue else: raise NotImplementedError
def __init__(self, block, layers, pretrain=False, num_classes=10, stochastic_depth=False, PL=0.5, noise_level=0.001, noise=False): self.in_planes = 16 self.planes = [16, 32, 64] self.strides = [1, 2, 2] super(InResNet, self).__init__() self.noise = noise self.block = block self.conv1 = nn.Conv2d(3, 16, kernel_size=3, padding=1, bias=False) self.bn1 = nn.BatchNorm2d(16) self.relu = nn.ReLU(inplace=True) self.pretrain = pretrain self.ks = nn.ParameterList([ nn.Parameter(torch.Tensor(1).uniform_(0.2, 0.25)) for i in range(layers[0] + layers[1] + layers[2]) ]) # self.ks=nn.ParameterList([nn.Parameter(torch.Tensor(1).uniform_(0.2, 0.25))for i in range(layers[0]+layers[1]+layers[2])]) # Use this line for \lambda-In-ResNet; for 164-layer experiments, use [0.8, 0.9] for In-ResNet or [0.1, 0.2] for \lambda-In-ResNet self.stochastic_depth = stochastic_depth blocks = [] n = layers[0] + layers[1] + layers[2] if not self.stochastic_depth: for i in range(3): blocks.append( block(self.in_planes, self.planes[i], self.strides[i])) self.in_planes = self.planes[i] * block.expansion for j in range(1, layers[i]): blocks.append(block(self.in_planes, self.planes[i])) else: death_rates = [i / (n - 1) * (1 - PL) for i in range(n)] print(death_rates) for i in range(3): blocks.append( block(self.in_planes, self.planes[i], self.strides[i], death_rate=death_rates[i * layers[0]])) self.in_planes = self.planes[i] * block.expansion for j in range(1, layers[i]): blocks.append( block(self.in_planes, self.planes[i], death_rate=death_rates[i * layers[0] + j])) self.blocks = nn.ModuleList(blocks) self.downsample1 = Downsample(16, 64, stride=1) self.downsample21 = Downsample(16 * block.expansion, 32 * block.expansion) self.downsample31 = Downsample(32 * block.expansion, 64 * block.expansion) self.bn = nn.BatchNorm2d(64 * block.expansion) self.avgpool = nn.AvgPool2d(8) self.fc = nn.Linear(64 * block.expansion, num_classes) for m in self.modules(): if isinstance(m, nn.Conv2d): n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels m.weight.data.normal_(0, math.sqrt(2. / n)) elif isinstance(m, nn.BatchNorm2d): m.weight.data.fill_(1) m.bias.data.zero_()
def __init__(self, depth=1, sizes=[128]): super(MLP, self).__init__() assert len(sizes) == depth, 'num_layers must match depth!' self.depth = depth self.sizes = sizes self.linear_weights = nn.ParameterList()
def __init__(self,nWay): super(Learner, self).__init__() self.config = [ ('conv2d', [32, 1, 3, 3, 2, 0]), ('relu', [True]), ('bn', [32]), ('conv2d', [64, 32, 3, 3, 2, 0]), ('relu', [True]), ('bn', [64]), ('conv2d', [128, 64, 3, 3, 2, 0]), ('relu', [True]), ('bn', [128]), ('conv2d', [128, 128, 2, 2, 1, 0]), ('relu', [True]), ('bn', [128]), ('flatten', []), ('linear', [nWay, 128]) ] # this dict contains all tensors needed to be optimized self.vars = nn.ParameterList() self.vars_bn = nn.ParameterList() for i, (name, param) in enumerate(self.config): if name is 'conv2d': # [ch_out, ch_in, kernelsz, kernelsz] w = nn.Parameter(torch.ones(*param[:4])) # gain=1 according to cbfin's implementation torch.nn.init.kaiming_normal_(w) self.vars.append(w) self.vars.append(nn.Parameter(torch.zeros(param[0]))) elif name is 'convt2d': # [ch_in, ch_out, kernelsz, kernelsz, stride, padding] w = nn.Parameter(torch.ones(*param[:4])) # gain=1 according to cbfin's implementation torch.nn.init.kaiming_normal_(w) self.vars.append(w) # [ch_in, ch_out] self.vars.append(nn.Parameter(torch.zeros(param[1]))) elif name is 'linear': # [ch_out, ch_in] w = nn.Parameter(torch.ones(*param)) # gain=1 according to cbfinn's implementation torch.nn.init.kaiming_normal_(w) self.vars.append(w) # [ch_out] self.vars.append(nn.Parameter(torch.zeros(param[0]))) elif name is 'bn': # [ch_out] w = nn.Parameter(torch.ones(param[0])) self.vars.append(w) # [ch_out] self.vars.append(nn.Parameter(torch.zeros(param[0]))) running_mean = nn.Parameter(torch.zeros(param[0]), requires_grad=False) running_var = nn.Parameter(torch.ones(param[0]), requires_grad=False) self.vars_bn.extend([running_mean, running_var]) elif name in ['tanh', 'relu', 'upsample', 'avg_pool2d', 'max_pool2d', 'flatten', 'reshape', 'leakyrelu', 'sigmoid']: continue else: raise NotImplementedError
def __init__(self, adjmat_list, input_data_dim, num_agg_steps, vertex_embed_dim, mlp_num_hidden, mlp_hidden_dim, vertices_are_onehot, target_dim, epsilon_tunable=False, dense_layer_dropout=0.0, other_mlp_parameters={}): """ Most parameters defined in the parent class :param adjmat_list: List of all adjmats to be considered Purpose: force input validation, but not saved to any variable. The user will enter the graphs in the dataset. In principle, the graphs passed to initialize could be different than those used in the forward method; it is up to the user to properly do input validation on all desired graphs This is NOT stored as a self object; rest easy we're not wasting memory :param target_dim: Dimension of the response variable (the target) :param epsilon_tunable: Do we make epsilon in equation 4.1 tunable :param dense_layer_dropout: Dropout to apply to the dense layer. In accordance with the GIN paper's experimental section """ # Make sure all entered matrices are coo def is_coo(mat): return isinstance(mat, sps.coo.coo_matrix) # Make sure there are ones on the diagonal. def diags_all_one(mat): return np.array_equal(mat.diagonal(), np.ones(mat.shape[0])) assert all(list(map( is_coo, adjmat_list))), "All adjacency matrices must be scipy sparse coo" assert all(list(map( diags_all_one, adjmat_list))), "All adjacency matrices must have ones on the diag" assert isinstance( dense_layer_dropout, float), "Dense layer dropout must be a float in 0 <= p < 1" assert 0 <= dense_layer_dropout < 1, "Dense layer dropout must be a float in 0 <= p < 1" super(GinMultiGraph, self).__init__(input_data_dim=input_data_dim, num_agg_steps=num_agg_steps, vertex_embed_dim=vertex_embed_dim, mlp_num_hidden=mlp_num_hidden, mlp_hidden_dim=mlp_hidden_dim, vertices_are_onehot=vertices_are_onehot, other_mlp_parameters=other_mlp_parameters) self.target_dim = target_dim self.add_module("last_linear", nn.Linear(self.graph_embed_dim, target_dim)) self.epsilon_tunable = epsilon_tunable logging.info("Dense layer dropout: {}".format(dense_layer_dropout)) self.dense_layer_dropout = nn.Dropout(p=dense_layer_dropout) if epsilon_tunable: logging.info("User indicated: epsilon_tunable = True") logging.info("Epsilon_k WILL be LEARNED via backprop") logging.info("It is initialized to zero") self.epsilons = nn.ParameterList() for ll in range(num_agg_steps): epsilon_k = nn.Parameter(torch.zeros(1), requires_grad=True) self.epsilons.append(epsilon_k) else: logging.info("User indicated: epsilon_tunable = False") logging.info( "Epsilon_k WILL NOT be learned via backprop (and set to zero implicitly)" )
def __init__(self, in_channels, num_class, graph_cfg, T=300, RAM_encoder_output_channels=128, RAM_decoder_output_channels=64, edge_importance_weighting=True, relative_attention_component=True, geometric_component=True, temporal_kernel_size=9, **kwargs): super().__init__() self.relative_attention_component = relative_attention_component self.geometric_component = geometric_component # load graph self.graph = Graph(**graph_cfg) A = torch.tensor(self.graph.A, dtype=torch.float32, requires_grad=False) self.register_buffer('A', A) # build networks spatial_kernel_size = A.size(0) self.temporal_kernel_size = temporal_kernel_size kernel_size = (self.temporal_kernel_size, spatial_kernel_size, A.size(1)) self.data_bn = nn.BatchNorm2d(in_channels) kwargs0 = {k: v for k, v in kwargs.items() if k != 'dropout'} self.st_gcn_networks = nn.ModuleList(( DRGCBlock(in_channels, 64, kernel_size, 1, residual=False, **kwargs0), DRGCBlock(64, 64, kernel_size, 1, **kwargs), DRGCBlock(64, 64, kernel_size, 1, **kwargs), DRGCBlock(64, 64, kernel_size, 1, **kwargs), DRGCBlock(64, 128, kernel_size, 2, **kwargs), DRGCBlock(128, 128, kernel_size, 1, **kwargs), DRGCBlock(128, 128, kernel_size, 1, **kwargs), DRGCBlock(128, 256, kernel_size, 2, **kwargs), DRGCBlock(256, 256, kernel_size, 1, **kwargs), DRGCBlock(256, 256, kernel_size, 1, **kwargs), )) self.RAMGen = RAMGen(3, RAM_encoder_output_channels, RAM_decoder_output_channels, kernel_size, T, self.relative_attention_component, self.geometric_component) # initialize parameters for edge importance weighting if edge_importance_weighting: self.edge_importance = nn.ParameterList([ nn.Parameter(torch.ones(self.A.size())) for i in self.st_gcn_networks ]) # edge importance for RAM_r's encoder and decoder in RAMGen self.RAMGen_edge_importance = nn.Parameter( torch.ones(self.A.size())) else: self.edge_importance = [1] * len(self.st_gcn_networks) # fcn for prediction self.fcn = nn.Conv2d(256, num_class, kernel_size=(1, 1))
def __init__(self, max_length, n_way, type="cnnLinear"): ''' :param max_length: :param n_way: :param type: "cnnLinear" "concatLinear" "clsLinear" ''' nn.Module.__init__(self) self.max_length = max_length pretrain_path = './pretrain/bert-base-uncased/' self.sentence_embedding = network.embedding.BERTSentenceEmbedding( pretrain_path=pretrain_path, max_length=self.max_length) self.vars = nn.ParameterList() self.n_way = n_way self.feature_dim = 768 self.filter_num = 128 self.type = type self.attention = False # kernel size = 2 # [ch_out, ch_in, kernelsz, kernelsz] if type == "pcnnLinear": # CNN self.filter_sizes = [2, 3, 4, 5] for filter_size in self.filter_sizes: w = nn.Parameter( torch.ones(self.filter_num, 1, filter_size, self.feature_dim)) # [64,1,3,3]] torch.nn.init.kaiming_normal_(w) self.vars.append(w) self.vars.append(nn.Parameter(torch.zeros(self.filter_num))) filter_dim = self.filter_num * len([2, 3, 4, 5]) labels_num = self.n_way # dropout self.dropout = nn.Dropout(0.5) # linear w = nn.Parameter(torch.ones(128, filter_dim * 3)) self.linear = nn.Linear(filter_dim * 3, 128) torch.nn.init.kaiming_normal_(w) self.vars.append(w) # [ch_out] self.vars.append(nn.Parameter(torch.zeros(128))) w = nn.Parameter(torch.ones(labels_num, 128)) self.linear = nn.Linear(128, labels_num) torch.nn.init.kaiming_normal_(w) self.vars.append(w) self.vars.append(nn.Parameter(torch.zeros(labels_num))) elif self.type == 'cnnLinear': # *************attention***************** if self.attention: w = nn.Parameter(torch.ones(self.feature_dim, self.feature_dim), requires_grad=True) # self.linear = nn.Linear(filter_dim, labels_num) torch.nn.init.kaiming_normal_(w) self.vars.append(w) self.vars.append( nn.Parameter(torch.zeros(self.feature_dim), requires_grad=True)) w = nn.Parameter(torch.ones(1, self.feature_dim), requires_grad=True) torch.nn.init.kaiming_normal_(w) self.vars.append(w) # *************conv********************* # kernel size = 2 # [ch_out, ch_in, kernelsz, kernelsz] for filter_size in [2, 3, 4, 5]: w = nn.Parameter(torch.ones(self.filter_num, 1, filter_size, self.feature_dim), requires_grad=True) # [64,1,3,3]] torch.nn.init.kaiming_normal_(w) self.vars.append(w) self.vars.append( nn.Parameter(torch.zeros(self.filter_num), requires_grad=True)) filter_dim = self.filter_num * len([2, 3, 4, 5]) labels_num = self.n_way # dropout self.dropout = nn.Dropout(0.5) # linear w = nn.Parameter(torch.ones(labels_num, filter_dim), requires_grad=True) # self.linear = nn.Linear(filter_dim, labels_num) torch.nn.init.kaiming_normal_(w) self.vars.append(w) # [ch_out] self.vars.append( nn.Parameter(torch.zeros(labels_num), requires_grad=True)) # linear elif self.type == "concatLinear": w = nn.Parameter(torch.ones(self.n_way, 1536)) self.linear = nn.Linear(1536, self.n_way) torch.nn.init.kaiming_normal_(w) self.vars.append(w) self.vars.append(nn.Parameter(torch.zeros(self.n_way))) elif self.type == "clsLinear": w = nn.Parameter(torch.ones(self.n_way, 768)) self.linear = nn.Linear(768, self.n_way) torch.nn.init.kaiming_normal_(w) self.vars.append(w) self.vars.append(nn.Parameter(torch.zeros(self.n_way))) else: raise Exception( "Learner type only can be cnnLinear、concatLinear、clsLinear")
def __init__(self, field_size, feature_sizes, embedding_size=4, h_depth=3, deep_layers=[32, 32, 32], is_deep_dropout=True, dropout_deep=[0.5, 0.5, 0.5], use_inner_product=True, use_outer_product=False, deep_layers_activation='relu', n_epochs=64, batch_size=256, learning_rate=0.003, optimizer_type='adam', is_batch_norm=False, verbose=False, random_seed=950104, weight_decay=0.0, loss_type='logloss', eval_metric=roc_auc_score, use_cuda=True, n_class=1, greater_is_better=True): super(PNN, self).__init__() self.field_size = field_size self.feature_sizes = feature_sizes self.embedding_size = embedding_size self.h_depth = h_depth self.deep_layers = deep_layers self.is_deep_dropout = is_deep_dropout self.dropout_deep = dropout_deep self.use_inner_product = use_inner_product self.use_outer_product = use_outer_product self.deep_layers_activation = deep_layers_activation self.n_epochs = n_epochs self.batch_size = batch_size self.learning_rate = learning_rate self.optimizer_type = optimizer_type self.is_batch_norm = is_batch_norm self.verbose = verbose self.weight_decay = weight_decay self.random_seed = random_seed self.loss_type = loss_type self.eval_metric = eval_metric self.use_cuda = use_cuda self.n_class = n_class self.greater_is_better = greater_is_better torch.manual_seed(self.random_seed) """ check cuda """ if self.use_cuda and not torch.cuda.is_available(): self.use_cuda = False print( "Cuda is not available, automatically changed into cpu model") """ check use inner_product or outer_product """ if self.use_inner_product and self.use_inner_product: print("The model uses both inner product and outer product") elif self.use_inner_product: print("The model uses inner product (IPNN))") elif self.use_ffm: print("The model uses outer product (OPNN)") else: print( "The model is sample deep model only! Neither inner product or outer product is used" ) """ embbedding part """ print("Init embeddings") self.embeddings = nn.ModuleList([ nn.Embedding(feature_size, self.embedding_size) for feature_size in self.feature_sizes ]) print("Init embeddings finished") """ first order part (linear part) """ print("Init first order part") self.first_order_weight = nn.ModuleList([ nn.ParameterList([ torch.nn.Parameter(torch.randn(self.embedding_size), requires_grad=True) for j in range(self.field_size) ]) for i in range(self.deep_layers[0]) ]) self.bias = torch.nn.Parameter(torch.randn(self.deep_layers[0]), requires_grad=True) print("Init first order part finished") """ second order part (quadratic part) """ print("Init second order part") if self.use_inner_product: self.inner_second_weight_emb = nn.ModuleList([ nn.ParameterList([ torch.nn.Parameter(torch.randn(self.embedding_size), requires_grad=True) for j in range(self.field_size) ]) for i in range(self.deep_layers[0]) ]) if self.use_outer_product: arr = [] for i in range(self.deep_layers[0]): tmp = torch.randn(self.embedding_size, self.embedding_size) arr.append(torch.nn.Parameter(torch.mm(tmp, tmp.t()))) self.outer_second_weight_emb = nn.ParameterList(arr) print("Init second order part finished") print("Init nn part") for i, h in enumerate(self.deep_layers[1:], 1): setattr(self, 'linear_' + str(i), nn.Linear(self.deep_layers[i - 1], self.deep_layers[i])) if self.is_batch_norm: setattr(self, 'batch_norm_' + str(i), nn.BatchNorm1d(deep_layers[i])) if self.is_deep_dropout: setattr(self, 'linear_' + str(i) + '_dropout', nn.Dropout(self.dropout_deep[i])) self.deep_last_layer = nn.Linear(self.deep_layers[-1], self.n_class) print("Init nn part succeed") print("Init succeed")
def train(self): self.train_writer = SummaryWriter(logdir=self.save_path) dictionary_size = self.dictionary_size top1_acc_list_cumul = np.zeros( (int(self.args.num_classes / self.args.nb_cl), 4, self.args.nb_runs)) top1_acc_list_ori = np.zeros( (int(self.args.num_classes / self.args.nb_cl), 4, self.args.nb_runs)) X_train_total = np.array(self.trainset.train_data) Y_train_total = np.array(self.trainset.train_labels) X_valid_total = np.array(self.testset.test_data) Y_valid_total = np.array(self.testset.test_labels) np.random.seed(1993) for iteration_total in range(self.args.nb_runs): order_name = osp.join( self.save_path, "seed_{}_{}_order_run_{}.pkl".format(1993, self.args.dataset, iteration_total)) print("Order name:{}".format(order_name)) if osp.exists(order_name): print("Loading orders") order = utils.misc.unpickle(order_name) else: print("Generating orders") order = np.arange(self.args.num_classes) np.random.shuffle(order) utils.misc.savepickle(order, order_name) order_list = list(order) print(order_list) np.random.seed(self.args.random_seed) X_valid_cumuls = [] X_protoset_cumuls = [] X_train_cumuls = [] Y_valid_cumuls = [] Y_protoset_cumuls = [] Y_train_cumuls = [] alpha_dr_herding = np.zeros( (int(self.args.num_classes / self.args.nb_cl), dictionary_size, self.args.nb_cl), np.float32) prototypes = np.zeros( (self.args.num_classes, dictionary_size, X_train_total.shape[1], X_train_total.shape[2], X_train_total.shape[3])) for orde in range(self.args.num_classes): prototypes[orde, :, :, :, :] = X_train_total[np.where( Y_train_total == order[orde])] start_iter = int(self.args.nb_cl_fg / self.args.nb_cl) - 1 for iteration in range(start_iter, int(self.args.num_classes / self.args.nb_cl)): if iteration == start_iter: last_iter = 0 tg_model = self.network(num_classes=self.args.nb_cl_fg) in_features = tg_model.fc.in_features out_features = tg_model.fc.out_features print("Out_features:", out_features) ref_model = None free_model = None ref_free_model = None elif iteration == start_iter + 1: last_iter = iteration ref_model = copy.deepcopy(tg_model) print("Fusion Mode: " + self.args.fusion_mode) tg_model = self.network_mtl(num_classes=self.args.nb_cl_fg) ref_dict = ref_model.state_dict() tg_dict = tg_model.state_dict() tg_dict.update(ref_dict) tg_model.load_state_dict(tg_dict) tg_model.to(self.device) in_features = tg_model.fc.in_features out_features = tg_model.fc.out_features print("Out_features:", out_features) new_fc = modified_linear.SplitCosineLinear( in_features, out_features, self.args.nb_cl) new_fc.fc1.weight.data = tg_model.fc.weight.data new_fc.sigma.data = tg_model.fc.sigma.data tg_model.fc = new_fc lamda_mult = out_features * 1.0 / self.args.nb_cl else: last_iter = iteration ref_model = copy.deepcopy(tg_model) in_features = tg_model.fc.in_features out_features1 = tg_model.fc.fc1.out_features out_features2 = tg_model.fc.fc2.out_features print("Out_features:", out_features1 + out_features2) new_fc = modified_linear.SplitCosineLinear( in_features, out_features1 + out_features2, self.args.nb_cl) new_fc.fc1.weight.data[: out_features1] = tg_model.fc.fc1.weight.data new_fc.fc1.weight.data[ out_features1:] = tg_model.fc.fc2.weight.data new_fc.sigma.data = tg_model.fc.sigma.data tg_model.fc = new_fc lamda_mult = (out_features1 + out_features2) * 1.0 / (self.args.nb_cl) if iteration > start_iter: cur_lamda = self.args.lamda * math.sqrt(lamda_mult) else: cur_lamda = self.args.lamda actual_cl = order[range(last_iter * self.args.nb_cl, (iteration + 1) * self.args.nb_cl)] indices_train_10 = np.array([ i in order[range(last_iter * self.args.nb_cl, (iteration + 1) * self.args.nb_cl)] for i in Y_train_total ]) indices_test_10 = np.array([ i in order[range(last_iter * self.args.nb_cl, (iteration + 1) * self.args.nb_cl)] for i in Y_valid_total ]) X_train = X_train_total[indices_train_10] X_valid = X_valid_total[indices_test_10] X_valid_cumuls.append(X_valid) X_train_cumuls.append(X_train) X_valid_cumul = np.concatenate(X_valid_cumuls) X_train_cumul = np.concatenate(X_train_cumuls) Y_train = Y_train_total[indices_train_10] Y_valid = Y_valid_total[indices_test_10] Y_valid_cumuls.append(Y_valid) Y_train_cumuls.append(Y_train) Y_valid_cumul = np.concatenate(Y_valid_cumuls) Y_train_cumul = np.concatenate(Y_train_cumuls) if iteration == start_iter: X_valid_ori = X_valid Y_valid_ori = Y_valid else: X_protoset = np.concatenate(X_protoset_cumuls) Y_protoset = np.concatenate(Y_protoset_cumuls) if self.args.rs_ratio > 0: scale_factor = (len(X_train) * self.args.rs_ratio) / ( len(X_protoset) * (1 - self.args.rs_ratio)) rs_sample_weights = np.concatenate( (np.ones(len(X_train)), np.ones(len(X_protoset)) * scale_factor)) rs_num_samples = int( len(X_train) / (1 - self.args.rs_ratio)) print( "X_train:{}, X_protoset:{}, rs_num_samples:{}".format( len(X_train), len(X_protoset), rs_num_samples)) X_train = np.concatenate((X_train, X_protoset), axis=0) Y_train = np.concatenate((Y_train, Y_protoset)) print('Batch of classes number {0} arrives'.format(iteration + 1)) map_Y_train = np.array([order_list.index(i) for i in Y_train]) map_Y_valid_cumul = np.array( [order_list.index(i) for i in Y_valid_cumul]) is_start_iteration = (iteration == start_iter) if iteration > start_iter: old_embedding_norm = tg_model.fc.fc1.weight.data.norm( dim=1, keepdim=True) average_old_embedding_norm = torch.mean(old_embedding_norm, dim=0).to('cpu').type( torch.DoubleTensor) tg_feature_model = nn.Sequential( *list(tg_model.children())[:-1]) num_features = tg_model.fc.in_features novel_embedding = torch.zeros((self.args.nb_cl, num_features)) for cls_idx in range(iteration * self.args.nb_cl, (iteration + 1) * self.args.nb_cl): cls_indices = np.array([i == cls_idx for i in map_Y_train]) assert (len( np.where(cls_indices == 1)[0]) == dictionary_size) self.evalset.test_data = X_train[cls_indices].astype( 'uint8') self.evalset.test_labels = np.zeros( self.evalset.test_data.shape[0]) evalloader = torch.utils.data.DataLoader( self.evalset, batch_size=self.args.eval_batch_size, shuffle=False, num_workers=self.args.num_workers) num_samples = self.evalset.test_data.shape[0] cls_features = compute_features(tg_model, free_model, tg_feature_model, is_start_iteration, evalloader, num_samples, num_features) norm_features = F.normalize(torch.from_numpy(cls_features), p=2, dim=1) cls_embedding = torch.mean(norm_features, dim=0) novel_embedding[cls_idx - iteration * self.args.nb_cl] = F.normalize( cls_embedding, p=2, dim=0) * average_old_embedding_norm tg_model.to(self.device) tg_model.fc.fc2.weight.data = novel_embedding.to(self.device) self.trainset.train_data = X_train.astype('uint8') self.trainset.train_labels = map_Y_train if iteration > start_iter and self.args.rs_ratio > 0 and scale_factor > 1: print("Weights from sampling:", rs_sample_weights) index1 = np.where(rs_sample_weights > 1)[0] index2 = np.where(map_Y_train < iteration * self.args.nb_cl)[0] assert ((index1 == index2).all()) train_sampler = torch.utils.data.sampler.WeightedRandomSampler( rs_sample_weights, rs_num_samples) trainloader = torch.utils.data.DataLoader( self.trainset, batch_size=self.args.train_batch_size, shuffle=False, sampler=train_sampler, num_workers=self.args.num_workers) else: trainloader = torch.utils.data.DataLoader( self.trainset, batch_size=self.args.train_batch_size, shuffle=True, num_workers=self.args.num_workers) self.testset.test_data = X_valid_cumul.astype('uint8') self.testset.test_labels = map_Y_valid_cumul testloader = torch.utils.data.DataLoader( self.testset, batch_size=self.args.test_batch_size, shuffle=False, num_workers=self.args.num_workers) print('Max and min of train labels: {}, {}'.format( min(map_Y_train), max(map_Y_train))) print('Max and min of valid labels: {}, {}'.format( min(map_Y_valid_cumul), max(map_Y_valid_cumul))) ckp_name = osp.join( self.save_path, 'run_{}_iteration_{}_model.pth'.format(iteration_total, iteration)) ckp_name_free = osp.join( self.save_path, 'run_{}_iteration_{}_free_model.pth'.format( iteration_total, iteration)) print('Checkpoint name:', ckp_name) if iteration == start_iter and self.args.resume_fg: print("Loading first group models from checkpoint") tg_model = torch.load(self.args.ckpt_dir_fg) elif self.args.resume and os.path.exists(ckp_name): print("Loading models from checkpoint") tg_model = torch.load(ckp_name) else: if iteration > start_iter: ref_model = ref_model.to(self.device) ignored_params = list(map(id, tg_model.fc.fc1.parameters())) base_params = filter(lambda p: id(p) not in ignored_params, tg_model.parameters()) base_params = filter(lambda p: p.requires_grad, base_params) base_params = filter(lambda p: p.requires_grad, base_params) tg_params_new = [{ 'params': base_params, 'lr': self.args.base_lr2, 'weight_decay': self.args.custom_weight_decay }, { 'params': tg_model.fc.fc1.parameters(), 'lr': 0, 'weight_decay': 0 }] tg_model = tg_model.to(self.device) tg_optimizer = optim.SGD( tg_params_new, lr=self.args.base_lr2, momentum=self.args.custom_momentum, weight_decay=self.args.custom_weight_decay) else: tg_params = tg_model.parameters() tg_model = tg_model.to(self.device) tg_optimizer = optim.SGD( tg_params, lr=self.args.base_lr1, momentum=self.args.custom_momentum, weight_decay=self.args.custom_weight_decay) if iteration > start_iter: tg_lr_scheduler = lr_scheduler.MultiStepLR( tg_optimizer, milestones=self.lr_strat, gamma=self.args.lr_factor) else: tg_lr_scheduler = lr_scheduler.MultiStepLR( tg_optimizer, milestones=self.lr_strat_first_phase, gamma=self.args.lr_factor) print("Incremental train") if iteration > start_iter: tg_model = incremental_train_and_eval( self.args.epochs, tg_model, ref_model, free_model, ref_free_model, tg_optimizer, tg_lr_scheduler, trainloader, testloader, iteration, start_iter, cur_lamda, self.args.dist, self.args.K, self.args.lw_mr) else: tg_model = incremental_train_and_eval( self.args.epochs, tg_model, ref_model, free_model, ref_free_model, tg_optimizer, tg_lr_scheduler, trainloader, testloader, iteration, start_iter, cur_lamda, self.args.dist, self.args.K, self.args.lw_mr) torch.save(tg_model, ckp_name) if self.args.fix_budget: nb_protos_cl = int( np.ceil(self.args.nb_protos * 100. / self.args.nb_cl / (iteration + 1))) else: nb_protos_cl = self.args.nb_protos tg_feature_model = nn.Sequential(*list(tg_model.children())[:-1]) num_features = tg_model.fc.in_features for iter_dico in range(last_iter * self.args.nb_cl, (iteration + 1) * self.args.nb_cl): self.evalset.test_data = prototypes[iter_dico].astype('uint8') self.evalset.test_labels = np.zeros( self.evalset.test_data.shape[0]) evalloader = torch.utils.data.DataLoader( self.evalset, batch_size=self.args.eval_batch_size, shuffle=False, num_workers=self.args.num_workers) num_samples = self.evalset.test_data.shape[0] mapped_prototypes = compute_features(tg_model, free_model, tg_feature_model, is_start_iteration, evalloader, num_samples, num_features) D = mapped_prototypes.T D = D / np.linalg.norm(D, axis=0) mu = np.mean(D, axis=1) index1 = int(iter_dico / self.args.nb_cl) index2 = iter_dico % self.args.nb_cl alpha_dr_herding[index1, :, index2] = alpha_dr_herding[index1, :, index2] * 0 w_t = mu iter_herding = 0 iter_herding_eff = 0 while not (np.sum(alpha_dr_herding[index1, :, index2] != 0) == min(nb_protos_cl, 500)) and iter_herding_eff < 1000: tmp_t = np.dot(w_t, D) ind_max = np.argmax(tmp_t) iter_herding_eff += 1 if alpha_dr_herding[index1, ind_max, index2] == 0: alpha_dr_herding[index1, ind_max, index2] = 1 + iter_herding iter_herding += 1 w_t = w_t + mu - D[:, ind_max] X_protoset_cumuls = [] Y_protoset_cumuls = [] class_means = np.zeros((64, 100, 2)) for iteration2 in range(iteration + 1): for iter_dico in range(self.args.nb_cl): current_cl = order[range(iteration2 * self.args.nb_cl, (iteration2 + 1) * self.args.nb_cl)] self.evalset.test_data = prototypes[ iteration2 * self.args.nb_cl + iter_dico].astype('uint8') self.evalset.test_labels = np.zeros( self.evalset.test_data.shape[0]) #zero labels evalloader = torch.utils.data.DataLoader( self.evalset, batch_size=self.args.eval_batch_size, shuffle=False, num_workers=self.args.num_workers) num_samples = self.evalset.test_data.shape[0] mapped_prototypes = compute_features( tg_model, free_model, tg_feature_model, is_start_iteration, evalloader, num_samples, num_features) D = mapped_prototypes.T D = D / np.linalg.norm(D, axis=0) self.evalset.test_data = prototypes[ iteration2 * self.args.nb_cl + iter_dico][:, :, :, ::-1].astype('uint8') evalloader = torch.utils.data.DataLoader( self.evalset, batch_size=self.args.eval_batch_size, shuffle=False, num_workers=self.args.num_workers) mapped_prototypes2 = compute_features( tg_model, free_model, tg_feature_model, is_start_iteration, evalloader, num_samples, num_features) D2 = mapped_prototypes2.T D2 = D2 / np.linalg.norm(D2, axis=0) alph = alpha_dr_herding[iteration2, :, iter_dico] alph = (alph > 0) * (alph < nb_protos_cl + 1) * 1. X_protoset_cumuls.append( prototypes[iteration2 * self.args.nb_cl + iter_dico, np.where(alph == 1)[0]]) Y_protoset_cumuls.append( order[iteration2 * self.args.nb_cl + iter_dico] * np.ones(len(np.where(alph == 1)[0]))) alph = alph / np.sum(alph) class_means[:, current_cl[iter_dico], 0] = (np.dot(D, alph) + np.dot(D2, alph)) / 2 class_means[:, current_cl[iter_dico], 0] /= np.linalg.norm( class_means[:, current_cl[iter_dico], 0]) alph = np.ones(dictionary_size) / dictionary_size class_means[:, current_cl[iter_dico], 1] = (np.dot(D, alph) + np.dot(D2, alph)) / 2 class_means[:, current_cl[iter_dico], 1] /= np.linalg.norm( class_means[:, current_cl[iter_dico], 1]) current_means = class_means[:, order[range(0, (iteration + 1) * self.args.nb_cl)]] X_protoset_array_old = np.array(X_protoset_cumuls) self.T = self.args.mnemonics_steps * self.args.mnemonics_epochs self.img_size = 32 self.mnemonics_lrs = self.args.mnemonics_lr num_classes_incremental = self.args.nb_cl num_classes = self.args.nb_cl_fg nb_cl = self.args.nb_cl transform_proto = transforms.Compose([ transforms.ToTensor(), transforms.Normalize((0.5071, 0.4866, 0.4409), (0.2009, 0.1984, 0.2023)), ]) self.mnemonics_label = [] if iteration == start_iter: the_X_protoset_array = np.array(X_protoset_cumuls).astype( 'uint8') the_Y_protoset_cumuls = np.array(Y_protoset_cumuls) else: the_X_protoset_array = np.array( X_protoset_cumuls[-num_classes_incremental:]).astype( 'uint8') the_Y_protoset_cumuls = np.array( Y_protoset_cumuls[-num_classes_incremental:]) self.mnemonics_data = torch.zeros(the_X_protoset_array.shape[0], the_X_protoset_array.shape[1], 3, self.img_size, self.img_size) for idx1 in range(the_X_protoset_array.shape[0]): for idx2 in range(the_X_protoset_array.shape[1]): the_img = the_X_protoset_array[idx1][idx2] the_PIL_image = Image.fromarray(the_img) the_PIL_image = transform_proto(the_PIL_image) self.mnemonics_data[idx1][idx2] = the_PIL_image map_Y_label = self.map_labels(order_list, the_Y_protoset_cumuls[idx1]) self.mnemonics_label.append(map_Y_label) self.mnemonics = nn.ParameterList() self.mnemonics.append(nn.Parameter(self.mnemonics_data)) start_iteration = start_iter device = self.device self.mnemonics.to(device) tg_feature_model = nn.Sequential(*list(tg_model.children())[:-1]) tg_feature_model.eval() tg_model.eval() if free_model is not None: free_model.eval() self.mnemonics_optimizer = optim.SGD( self.mnemonics, lr=self.args.mnemonics_outer_lr, momentum=0.9, weight_decay=5e-4) self.mnemonics_lr_scheduler = optim.lr_scheduler.StepLR( self.mnemonics_optimizer, step_size=self.args.mnemonics_decay_epochs, gamma=self.args.mnemonics_decay_factor) current_means_new = current_means[:, :, 0].T for epoch in range(self.args.mnemonics_total_epochs): train_loss = 0 self.mnemonics_lr_scheduler.step() for batch_idx, (q_inputs, q_targets) in enumerate(trainloader): q_inputs, q_targets = q_inputs.to(device), q_targets.to( device) if iteration == start_iteration: q_feature = tg_feature_model(q_inputs) else: q_feature = process_inputs_fp(tg_model, free_model, q_inputs, feature_mode=True) self.mnemonics_optimizer.zero_grad() total_tr_loss = 0 if iteration == start_iteration: mnemonics_outputs = tg_feature_model( self.mnemonics[0][0]) else: mnemonics_outputs = process_inputs_fp( tg_model, free_model, self.mnemonics[0][0], feature_mode=True) this_class_mean_mnemonics = torch.mean(mnemonics_outputs, dim=0) this_class_mean_mnemonics = torch.squeeze( this_class_mean_mnemonics) total_class_mean_mnemonics = this_class_mean_mnemonics.unsqueeze( dim=0) for mnemonics_idx in range(len(self.mnemonics[0]) - 1): if iteration == start_iteration: mnemonics_outputs = tg_feature_model( self.mnemonics[0][mnemonics_idx + 1]) else: mnemonics_outputs = process_inputs_fp( tg_model, free_model, self.mnemonics[0][mnemonics_idx + 1], feature_mode=True) this_class_mean_mnemonics = torch.mean( mnemonics_outputs, dim=0) this_class_mean_mnemonics = torch.squeeze( this_class_mean_mnemonics) total_class_mean_mnemonics = torch.cat( (total_class_mean_mnemonics, this_class_mean_mnemonics.unsqueeze(dim=0)), dim=0) if iteration == start_iteration: all_cls_means = total_class_mean_mnemonics else: all_cls_means = torch.tensor( current_means_new).float().to(device) all_cls_means[-nb_cl:] = total_class_mean_mnemonics the_logits = F.linear( F.normalize(torch.squeeze(q_feature), p=2, dim=1), F.normalize(all_cls_means, p=2, dim=1)) loss = F.cross_entropy(the_logits, q_targets) loss.backward() self.mnemonics_optimizer.step() train_loss += loss.item() X_protoset_cumuls = process_mnemonics( X_protoset_cumuls, Y_protoset_cumuls, self.mnemonics, self.mnemonics_label, order_list, self.args.nb_cl_fg, self.args.nb_cl, iteration, start_iter) X_protoset_array = np.array(X_protoset_cumuls) X_protoset_cumuls_idx = 0 for iteration2 in range(iteration + 1): for iter_dico in range(self.args.nb_cl): alph = alpha_dr_herding[iteration2, :, iter_dico] alph = (alph > 0) * (alph < nb_protos_cl + 1) * 1. this_X_protoset_array = X_protoset_array[ X_protoset_cumuls_idx] X_protoset_cumuls_idx += 1 this_X_protoset_array = this_X_protoset_array.astype( np.float64) prototypes[iteration2 * self.args.nb_cl + iter_dico, np.where(alph == 1)[0]] = this_X_protoset_array class_means = np.zeros((64, 100, 2)) for iteration2 in range(iteration + 1): for iter_dico in range(self.args.nb_cl): current_cl = order[range(iteration2 * self.args.nb_cl, (iteration2 + 1) * self.args.nb_cl)] self.evalset.test_data = prototypes[ iteration2 * self.args.nb_cl + iter_dico].astype('uint8') self.evalset.test_labels = np.zeros( self.evalset.test_data.shape[0]) #zero labels evalloader = torch.utils.data.DataLoader( self.evalset, batch_size=self.args.eval_batch_size, shuffle=False, num_workers=self.args.num_workers) num_samples = self.evalset.test_data.shape[0] mapped_prototypes = compute_features( tg_model, free_model, tg_feature_model, is_start_iteration, evalloader, num_samples, num_features) D = mapped_prototypes.T D = D / np.linalg.norm(D, axis=0) self.evalset.test_data = prototypes[ iteration2 * self.args.nb_cl + iter_dico][:, :, :, ::-1].astype('uint8') evalloader = torch.utils.data.DataLoader( self.evalset, batch_size=self.args.eval_batch_size, shuffle=False, num_workers=self.args.num_workers) mapped_prototypes2 = compute_features( tg_model, free_model, tg_feature_model, is_start_iteration, evalloader, num_samples, num_features) D2 = mapped_prototypes2.T D2 = D2 / np.linalg.norm(D2, axis=0) alph = alpha_dr_herding[iteration2, :, iter_dico] alph = (alph > 0) * (alph < nb_protos_cl + 1) * 1. alph = alph / np.sum(alph) class_means[:, current_cl[iter_dico], 0] = (np.dot(D, alph) + np.dot(D2, alph)) / 2 class_means[:, current_cl[iter_dico], 0] /= np.linalg.norm( class_means[:, current_cl[iter_dico], 0]) alph = np.ones(dictionary_size) / dictionary_size class_means[:, current_cl[iter_dico], 1] = (np.dot(D, alph) + np.dot(D2, alph)) / 2 class_means[:, current_cl[iter_dico], 1] /= np.linalg.norm( class_means[:, current_cl[iter_dico], 1]) torch.save( class_means, osp.join( self.save_path, 'run_{}_iteration_{}_class_means.pth'.format( iteration_total, iteration))) current_means = class_means[:, order[range(0, (iteration + 1) * self.args.nb_cl)]] is_start_iteration = (iteration == start_iter) map_Y_valid_ori = np.array( [order_list.index(i) for i in Y_valid_ori]) print('Computing accuracy for first-phase classes') self.evalset.test_data = X_valid_ori.astype('uint8') self.evalset.test_labels = map_Y_valid_ori evalloader = torch.utils.data.DataLoader( self.evalset, batch_size=self.args.eval_batch_size, shuffle=False, num_workers=self.args.num_workers) ori_acc, fast_fc = compute_accuracy( tg_model, free_model, tg_feature_model, current_means, X_protoset_cumuls, Y_protoset_cumuls, evalloader, order_list, is_start_iteration=is_start_iteration, maml_lr=self.args.maml_lr, maml_epoch=self.args.maml_epoch) top1_acc_list_ori[iteration, :, iteration_total] = np.array(ori_acc).T self.train_writer.add_scalar('ori_acc/LwF', float(ori_acc[0]), iteration) self.train_writer.add_scalar('ori_acc/iCaRL', float(ori_acc[1]), iteration) map_Y_valid_cumul = np.array( [order_list.index(i) for i in Y_valid_cumul]) print('Computing accuracy for all seen classes') self.evalset.test_data = X_valid_cumul.astype('uint8') self.evalset.test_labels = map_Y_valid_cumul evalloader = torch.utils.data.DataLoader( self.evalset, batch_size=self.args.eval_batch_size, shuffle=False, num_workers=self.args.num_workers) cumul_acc, _ = compute_accuracy( tg_model, free_model, tg_feature_model, current_means, X_protoset_cumuls, Y_protoset_cumuls, evalloader, order_list, is_start_iteration=is_start_iteration, fast_fc=fast_fc, maml_lr=self.args.maml_lr, maml_epoch=self.args.maml_epoch) top1_acc_list_cumul[iteration, :, iteration_total] = np.array(cumul_acc).T self.train_writer.add_scalar('cumul_acc/LwF', float(cumul_acc[0]), iteration) self.train_writer.add_scalar('cumul_acc/iCaRL', float(cumul_acc[1]), iteration) torch.save( top1_acc_list_ori, osp.join(self.save_path, 'run_{}_top1_acc_list_ori.pth'.format(iteration_total))) torch.save( top1_acc_list_cumul, osp.join(self.save_path, 'run_{}_top1_acc_list_cumul.pth'.format(iteration_total))) self.train_writer.close
def __init__(self, target_shapes, chunk_size, chunk_emb_size=8, cond_chunk_embs=False, uncond_in_size=0, cond_in_size=8, layers=(100, 100), verbose=True, activation_fn=torch.nn.ReLU(), use_bias=True, no_uncond_weights=False, no_cond_weights=False, num_cond_embs=1, dropout_rate=-1, use_spectral_norm=False, use_batch_norm=False): # FIXME find a way using super to handle multiple inheritance. nn.Module.__init__(self) HyperNetInterface.__init__(self) assert isinstance(chunk_size, int) and chunk_size > 0 assert isinstance(chunk_emb_size, int) and chunk_emb_size > 0 ### Make constructor arguments internally available ### self._chunk_size = chunk_size self._chunk_emb_size = chunk_emb_size self._cond_chunk_embs = cond_chunk_embs self._uncond_in_size = uncond_in_size self._cond_in_size = cond_in_size self._no_uncond_weights = no_uncond_weights self._no_cond_weights = no_cond_weights self._num_cond_embs = num_cond_embs ### Create underlying full hypernet ### # Note, even if chunk embeddings are considered conditional, they # are maintained in this object and just fed as an external input to the # underlying hnet. hnet_uncond_in_size = uncond_in_size + chunk_emb_size hnet_num_cond_embs = num_cond_embs if cond_chunk_embs and cond_in_size == 0: # If there are no other conditional embeddings except the chunk # embeddings, we tell the underlying hnet explicitly that it doesn't # need to maintain any conditional weights to avoid that it will # throw a warning. hnet_num_cond_embs = 0 self._hnet = HMLP([[chunk_size]], uncond_in_size=hnet_uncond_in_size, cond_in_size=cond_in_size, layers=layers, verbose=False, activation_fn=activation_fn, use_bias=use_bias, no_uncond_weights=no_uncond_weights, no_cond_weights=no_cond_weights, num_cond_embs=hnet_num_cond_embs, dropout_rate=dropout_rate, use_spectral_norm=use_spectral_norm, use_batch_norm=use_batch_norm) ### Setup attributes required by interface ### # Most of these attributes are taken over from `self._hnet` self._target_shapes = target_shapes self._num_known_conds = self._num_cond_embs self._unconditional_param_shapes_ref = \ list(self._hnet._unconditional_param_shapes_ref) if self._hnet._internal_params is not None: self._internal_params = \ nn.ParameterList(self._hnet._internal_params) self._param_shapes = list(self._hnet._param_shapes) self._param_shapes_meta = list(self._hnet._param_shapes_meta) if self._hnet._hyper_shapes_learned is not None: self._hyper_shapes_learned = list(self._hnet._hyper_shapes_learned) self._hyper_shapes_learned_ref = \ list(self._hnet._hyper_shapes_learned_ref) if self._hnet._hyper_shapes_distilled is not None: self._hyper_shapes_distilled = \ list(self._hnet._hyper_shapes_distilled) self._has_bias = self._hnet._has_bias self._has_fc_out = self._hnet._has_fc_out # Just to make that clear explicitly. We will additionally append # the chunk embeddings at the end of `param_shapes`. # We don't prepend it to the beginning, to keep conditional input # embeddings at the beginning. self._mask_fc_out = False self._has_linear_out = self._hnet._has_linear_out self._layer_weight_tensors = \ nn.ParameterList(self._hnet._layer_weight_tensors) self._layer_bias_vectors = \ nn.ParameterList(self._hnet._layer_bias_vectors) if self._hnet._batchnorm_layers is not None: self._batchnorm_layers = nn.ModuleList(self._hnet._batchnorm_layers) if self._hnet._context_mod_layers is not None: self._context_mod_layers = \ nn.ModuleList(self._hnet._context_mod_layers) ### Create chunk embeddings ### if cond_in_size == 0 and uncond_in_size == 0 and not cond_chunk_embs: # Note, we could also allow this case. It would be analoguous to # creating a full hypernet with no unconditional input and one # conditional embedding. But the user can explicitly achieve that # as noted below. raise ValueError('If no external (conditional or unconditional) ' + 'input is provided to the hypernetwork, then ' + 'it can only learn a fixed output. If this ' + 'behavior is desired, please enable ' + '"cond_chunk_embs" and set "num_cond_embs=1".') num_cemb_mats = 1 no_cemb_weights = no_uncond_weights if cond_chunk_embs: num_cemb_mats = num_cond_embs no_cemb_weights = no_cond_weights self._cemb_shape = [self.num_chunks, chunk_emb_size] for _ in range(num_cemb_mats): if not no_cemb_weights: self._internal_params.append(nn.Parameter( \ data=torch.Tensor(*self._cemb_shape), requires_grad=True)) torch.nn.init.normal_(self._internal_params[-1], mean=0., std=1.) else: self._hyper_shapes_learned.append(self._cemb_shape) self._hyper_shapes_learned_ref.append(len(self.param_shapes)) if not cond_chunk_embs: self._unconditional_param_shapes_ref.append( \ len(self.param_shapes)) self._param_shapes.append(self._cemb_shape) # In principle, these embeddings also belong to the input, so we # just assign them as "layer" 0 (note, the underlying hnet uses the # same layer ID for its embeddings. self._param_shapes_meta.append({ 'name': 'embedding', 'index': -1 if no_cemb_weights else \ len(self._internal_params)-1, 'layer': 0, 'info': 'chunk embeddings' }) ### Finalize construction ### self._is_properly_setup() if verbose: print('Created Chunked MLP Hypernet with %d chunk(s) of size %d.' \ % (self.num_chunks, chunk_size)) print(self)
def __init__(self, f, params=None): if params is None: params = () super(FuncModule, self).__init__() self.f = f self.params = nn.ParameterList(list(params))
def __init__(self): super(MyListDense, self).__init__() self.params = nn.ParameterList([nn.Parameter(torch.randn(4, 4)) for i in range(3)]) self.params.append(nn.Parameter(torch.randn(4, 1)))
def __init__(self, config, imgc, imgsz): """ :param config: network config file, type:list of (string, list) :param imgc: 1 or 3 :param imgsz: 28 or 84 """ super(Learner, self).__init__() self.config = config self.vars = nn.ParameterList() self.vars_bn = nn.ParameterList() for i, (name, param) in enumerate(self.config): if name is 'conv2d': # [ch_out, ch_in, kernelsz, kernelsz] w = nn.Parameter(torch.ones(*param[:4])) # gain=1 according to cbfin's implementation torch.nn.init.kaiming_normal_(w) self.vars.append(w) # [ch_out] self.vars.append(nn.Parameter(torch.zeros(param[0]))) elif name is 'convt2d': # [ch_in, ch_out, kernelsz, kernelsz, stride, padding] w = nn.Parameter(torch.ones(*param[:4])) # gain=1 according to cbfin's implementation torch.nn.init.kaiming_normal_(w) self.vars.append(w) # [ch_in, ch_out] self.vars.append(nn.Parameter(torch.zeros(param[1]))) elif name is 'linear': # [ch_out, ch_in] w = nn.Parameter(torch.ones(*param)) # gain=1 according to cbfinn's implementation torch.nn.init.kaiming_normal_(w) self.vars.append(w) # [ch_out] self.vars.append(nn.Parameter(torch.zeros(param[0]))) elif name is 'bn': # [ch_out] w = nn.Parameter(torch.ones(param[0])) self.vars.append(w) # [ch_out] self.vars.append(nn.Parameter(torch.zeros(param[0]))) # must set requires_grad=False running_mean = nn.Parameter(torch.zeros(param[0]), requires_grad=False) running_var = nn.Parameter(torch.ones(param[0]), requires_grad=False) self.vars_bn.extend([running_mean, running_var]) elif name in [ 'tanh', 'relu', 'upsample', 'avg_pool2d', 'max_pool2d', 'flatten', 'reshape', 'leakyrelu', 'sigmoid' ]: continue else: raise NotImplementedError for p in self.vars_bn: p.requires_grad = False pass