def __init__(self, input_size, hidden_size, output_size, num_layers=1, optimizer_type='Adagrad', lr=.01, weight_decay=0, momentum=0, eps=1e-6, loss_type='TOP1', clip_grad=-1, dropout_input=.0, dropout_hidden=.5, batch_size=50, use_cuda=True, time_sort=False, pretrained=None): """ The GRU4REC model Args: input_size (int): dimension of the gru input variables hidden_size (int): dimension of the gru hidden units output_size (int): dimension of the gru output variables num_layers (int): the number of layers in the GRU optimizer_type (str): optimizer type for GRU weights lr (float): learning rate for the optimizer weight_decay (float): weight decay for the optimizer momentum (float): momentum for the optimizer eps (float): eps for the optimizer loss_type (str): type of the loss function to use clip_grad (float): clip the gradient norm at clip_grad. No clipping if clip_grad = -1 dropout_input (float): dropout probability for the input layer dropout_hidden (float): dropout probability for the hidden layer batch_size (int): mini-batch size use_cuda (bool): whether you want to use cuda or not time_sort (bool): whether to ensure the the order of sessions is chronological (default: False) pretrained (modules.layer.GRU): pretrained GRU layer, if it exists (default: None) """ # Initialize the GRU Layer self.input_size = input_size self.hidden_size = hidden_size self.output_size = output_size self.batch_size = batch_size self.use_cuda = use_cuda if pretrained is None: self.gru = GRU(input_size, hidden_size, output_size, num_layers, dropout_input=dropout_input, dropout_hidden=dropout_hidden, use_cuda=use_cuda, batch_size=batch_size) else: self.gru = pretrained # Initialize the optimizer self.optimizer_type = optimizer_type self.weight_decay = weight_decay self.momentum = momentum self.lr = lr self.eps = eps self.optimizer = Optimizer(self.gru.parameters(), optimizer_type=optimizer_type, lr=lr, weight_decay=weight_decay, momentum=momentum, eps=eps) # Initialize the loss function self.loss_type = loss_type self.loss_fn = LossFunction(loss_type, use_cuda) # gradient clipping(optional) self.clip_grad = clip_grad # etc self.time_sort = time_sort
def __init__(self, input_size, if_embedding, embedding_size, hidden_size, output_size, num_layers=1, optimizer_type='Adagrad', lr=.01, weight_decay=0, momentum=0, eps=1e-6, loss_type='TOP1', clip_grad=-1, dropout_input=.0, dropout_hidden=.5, batch_size=50, use_cuda=True, cuda_id=1, compress=False, time_sort=False, pretrained=None): """ The GRU4REC model Args: input_size (int): dimension of the gru input variables hidden_size (int): dimension of the gru hidden units output_size (int): dimension of the gru output variables num_layers (int): the number of layers in the GRU optimizer_type (str): optimizer type for GRU weights lr (float): learning rate for the optimizer weight_decay (float): weight decay for the optimizer momentum (float): momentum for the optimizer eps (float): eps for the optimizer loss_type (str): type of the loss function to use clip_grad (float): clip the gradient norm at clip_grad. No clipping if clip_grad = -1 dropout_input (float): dropout probability for the input layer dropout_hidden (float): dropout probability for the hidden layer batch_size (int): mini-batch size use_cuda (bool): whether you want to use cuda or not time_sort (bool): whether to ensure the the order of sessions is chronological (default: False) pretrained (modules.layer.GRU): pretrained GRU layer, if it exists (default: None) """ # Initialize the GRU Layer self.input_size = input_size self.if_embedding = if_embedding self.embedding_size = embedding_size self.hidden_size = hidden_size self.output_size = output_size self.batch_size = batch_size self.use_cuda = use_cuda self.cuda_id = cuda_id self.device = torch.device( 'cuda:%d' % cuda_id if use_cuda else 'cpu' ) # must specify cuda_id or it will be torch.cuda.current_device() print(self.device) if pretrained is None: self.gru = GRU(input_size, if_embedding, embedding_size, hidden_size, output_size, num_layers, dropout_input=dropout_input, dropout_hidden=dropout_hidden, batch_size=batch_size, use_cuda=use_cuda, cuda_id=cuda_id) else: self.gru = pretrained # Initialize the optimizer self.optimizer_type = optimizer_type self.weight_decay = weight_decay self.momentum = momentum self.lr = lr self.eps = eps self.compress = compress self.compression_scheduler = None if self.compress: # Create a CompressionScheduler and configure it from a YAML schedule file source = self.compress self.compression_scheduler = distiller.config.file_config( self.gru, None, self.compress) self.optimizer = Optimizer(self.gru.parameters(), optimizer_type=optimizer_type, lr=lr, weight_decay=weight_decay, momentum=momentum, eps=eps) # Initialize the loss function self.loss_type = loss_type self.loss_fn = LossFunction(loss_type, use_cuda, cuda_id) # gradient clipping(optional) self.clip_grad = clip_grad # etc self.time_sort = time_sort