# the loss functions have been set to reduction='mean' if hidden_cost == 'alignment': # changing between alignment, l1 and l2 may require re-tuning of the hyperparameters net.add_loss(torch.nn.CosineSimilarity()) net.add_metric( torch.nn.CosineSimilarity()) # metric for validation elif hidden_cost == 'l2': net.add_loss(torch.nn.MSELoss(reduction='sum')) net.add_metric(torch.nn.MSELoss(reduction='sum')) elif hidden_cost == 'l1': net.add_loss(torch.nn.L1Loss(reduction='sum')) net.add_metric(torch.nn.L1Loss(reduction='sum')) # add loss function for the output layer net.add_loss(torch.nn.CrossEntropyLoss(reduction='sum')) net.add_metric(K.L0Loss(reduction='sum')) # this specifies how the G_i are computed (see the paper for the definition of G_i) net.add_critic(layer2.phi) # calculate G_1 using kernel k^(2) """ if torch.cuda.device_count() > 1: layer1 = torch.nn.DataParallel(layer1) # FIXME parallelizing kernelized layers currently has issues with layer.X # layer2 = torch.nn.DataParallel(layer2) """ net.add_layer(layer1) net.add_layer(layer2) ######### # begin training #########
# add loss function for the hidden layers if hidden_cost == 'alignment': # changing between alignment, l1 and l2 may require re-tuning of the hyperparameters net.add_loss(torch.nn.CosineSimilarity()) net.add_metric( torch.nn.CosineSimilarity()) # metric for validation elif hidden_cost == 'l2': net.add_loss(torch.nn.MSELoss(size_average=True, reduce=True)) net.add_metric(torch.nn.MSELoss(size_average=True, reduce=True)) elif hidden_cost == 'l1': net.add_loss(torch.nn.L1Loss(size_average=True, reduce=True)) net.add_metric(torch.nn.L1Loss(size_average=True, reduce=True)) # add loss function for the output layer net.add_loss(torch.nn.CrossEntropyLoss()) net.add_metric(K.L0Loss()) # this specifies how the G_i are computed (see the paper for the definition of G_i) net.add_critic(layer2.phi) # calculate G_1 using kernel k^(2) ######### # begin training ######### net.to(device) net.fit( n_epoch=(epo1, epo2), batch_size=batch_size, shuffle=shuffle, X=x_train, Y=y_train,