def get_current_consistency_weight(final_consistency_weight, epoch, step_in_epoch, total_steps_in_epoch): # Consistency ramp-up from https://arxiv.org/abs/1610.02242 epoch = epoch - args.consistency_rampup_starts epoch = epoch + step_in_epoch / total_steps_in_epoch return final_consistency_weight * ramps.sigmoid_rampup( epoch, args.consistency_rampup_ends - args.consistency_rampup_starts)
def get_current_consistency_weight(self, epoch): # Consistency ramp-up from https://arxiv.org/abs/1610.02242 #unsupervised weight ramp-up function """ we noticed that optimization tended to explode during the ramp-up period, and we eventually found that using a lower value for Adam β2 parameter (e.g., 0.99 instead of 0.999) seems to help in this regard. In our implementation, the unsupervised loss weighting function w(t) ramps up, starting from zero, along a Gaussian curve during the first 80 training epochs. See Appendix A for further details about this and other training parameters. In the beginning the total loss and the learning gradients are thus dominated by the supervised loss component, i.e., the labeled data only. We have found it to be very important that the ramp-up of the unsupervised loss component is slow enough—otherwise, the network gets easily stuck in a degenerate solution where no meaningful classification of the data is obtained. """ return self.args.consistency * ramps.sigmoid_rampup( epoch, self.args.consistency_rampup)
def get_current_consistency_weight(epoch): # Consistency ramp-up from https://arxiv.org/abs/1610.02242 return args.consistency * ramps.sigmoid_rampup(epoch, args.consistency_rampup)