def __init__(self, method, push_rewards, future_reward_discount, is_testing, load_snapshot, snapshot_file, force_cpu): self.method = method # Check if CUDA can be used if torch.cuda.is_available() and not force_cpu: print("CUDA detected. Running with GPU acceleration.") self.use_cuda = True elif force_cpu: print("CUDA detected, but overriding with option '--cpu'. Running with only CPU.") self.use_cuda = False else: print("CUDA is *NOT* detected. Running with only CPU.") self.use_cuda = False # Fully convolutional classification network for supervised learning if self.method == 'reactive': self.model = reactive_net(self.use_cuda) # Initialize classification loss push_num_classes = 3 # 0 - push, 1 - no change push, 2 - no loss push_class_weights = torch.ones(push_num_classes) push_class_weights[push_num_classes - 1] = 0 if self.use_cuda: self.push_criterion = CrossEntropyLoss2d(push_class_weights.cuda()).cuda() else: self.push_criterion = CrossEntropyLoss2d(push_class_weights) grasp_num_classes = 3 # 0 - grasp, 1 - failed grasp, 2 - no loss grasp_class_weights = torch.ones(grasp_num_classes) grasp_class_weights[grasp_num_classes - 1] = 0 if self.use_cuda: self.grasp_criterion = CrossEntropyLoss2d(grasp_class_weights.cuda()).cuda() else: self.grasp_criterion = CrossEntropyLoss2d(grasp_class_weights) # Fully convolutional Q network for deep reinforcement learning elif self.method == 'reinforcement': self.model = reinforcement_net(self.use_cuda) self.push_rewards = push_rewards self.future_reward_discount = future_reward_discount # Initialize Huber loss self.criterion = torch.nn.SmoothL1Loss(reduce=False) # Huber loss if self.use_cuda: self.criterion = self.criterion.cuda() # Load pre-trained model if load_snapshot: print('snapshot_file', snapshot_file) self.model.load_state_dict(torch.load(snapshot_file)) print('Pre-trained model snapshot loaded from: %s' % (snapshot_file)) # Convert model from CPU to GPU if self.use_cuda: self.model = self.model.cuda() # Set model to training mode self.model.train() # Initialize optimizer self.optimizer = torch.optim.SGD(self.model.parameters(), lr=1e-4, momentum=0.9, weight_decay=2e-5) self.iteration = 0 # Initialize lists to save execution info and RL variables self.executed_action_log = [] self.label_value_log = [] self.reward_value_log = [] self.predicted_value_log = [] self.use_heuristic_log = [] self.is_exploit_log = [] self.clearance_log = []
def __init__(self, method, push_rewards, future_reward_discount, is_testing, load_snapshot, snapshot_file): self.method = method # Check if CUDA can be used if torch.cuda.is_available(): print("CUDA detected. Running with GPU acceleration.") self.use_cuda = True else: print("CUDA is *NOT* detected. Running with only CPU.") self.use_cuda = False # Fully convolutional classification network for supervised learning if self.method == 'reactive': self.model = reactive_net(self.use_cuda) # Initialize classification loss push_num_classes = 3 # 0 - push, 1 - no change push, 2 - no loss push_class_weights = torch.ones(push_num_classes) push_class_weights[push_num_classes - 1] = 0 if self.use_cuda: self.push_criterion = CrossEntropyLoss2d( push_class_weights.cuda()).cuda() else: self.push_criterion = CrossEntropyLoss2d(push_class_weights) grasp_num_classes = 3 # 0 - grasp, 1 - failed grasp, 2 - no loss grasp_class_weights = torch.ones(grasp_num_classes) grasp_class_weights[grasp_num_classes - 1] = 0 if self.use_cuda: self.grasp_criterion = CrossEntropyLoss2d( grasp_class_weights.cuda()).cuda() else: self.grasp_criterion = CrossEntropyLoss2d(grasp_class_weights) # Fully convolutional Q network for deep reinforcement learning elif self.method == 'reinforcement': self.model = reinforcement_net(self.use_cuda) self.push_rewards = push_rewards self.future_reward_discount = future_reward_discount # Initialize Huber loss self.criterion = torch.nn.SmoothL1Loss(reduce=False) # Huber loss if self.use_cuda: self.criterion = self.criterion.cuda() # Load pre-trained model if load_snapshot: # PyTorch v0.4 removes periods in state dict keys, but no backwards compatibility :( loaded_snapshot_state_dict = torch.load(snapshot_file) loaded_snapshot_state_dict = OrderedDict([ (k.replace('conv.1', 'conv1'), v) if k.find('conv.1') else (k, v) for k, v in loaded_snapshot_state_dict.items() ]) loaded_snapshot_state_dict = OrderedDict([ (k.replace('norm.1', 'norm1'), v) if k.find('norm.1') else (k, v) for k, v in loaded_snapshot_state_dict.items() ]) loaded_snapshot_state_dict = OrderedDict([ (k.replace('conv.2', 'conv2'), v) if k.find('conv.2') else (k, v) for k, v in loaded_snapshot_state_dict.items() ]) loaded_snapshot_state_dict = OrderedDict([ (k.replace('norm.2', 'norm2'), v) if k.find('norm.2') else (k, v) for k, v in loaded_snapshot_state_dict.items() ]) self.model.load_state_dict(loaded_snapshot_state_dict) # self.model.load_state_dict(torch.load(snapshot_file)) # Old loading command pre v0.4 print('Pre-trained model snapshot loaded from: %s' % (snapshot_file)) # Convert model from CPU to GPU if self.use_cuda: self.model = self.model.cuda() # Set model to training mode self.model.train() # Initialize optimizer self.optimizer = torch.optim.SGD(self.model.parameters(), lr=1e-4, momentum=0.9, weight_decay=2e-5) self.iteration = 0 # Initialize lists to save execution info and RL variables self.executed_action_log = [] self.label_value_log = [] self.reward_value_log = [] self.predicted_value_log = [] self.use_heuristic_log = [] self.is_exploit_log = [] self.clearance_log = []