def __init__(self, opt, device): super(STR, self).__init__() self.opt = opt # Trans # self.Trans = Trans.TPS_SpatialTransformerNetwork(F = opt.num_fiducial, # i_size = (opt.imgH, opt.imgW), # i_r_size= (opt.imgH, opt.imgW), # i_channel_num=opt.input_channel, # device = device) #Extract if self.opt.extract =='RCNN': self.Extract = self.Extract = Extract.RCNN_extractor(opt.input_channel, opt.output_channel) elif 'efficientnet' in self.opt.extract : self.Extract = Extract.EfficientNet(opt) elif 'resnet' in self.opt.extract : self.Extract = Extract.ResNet_FeatureExtractor(opt.input_channel, opt.output_channel) else: raise print('invalid extract model name!') # self.Extract = Extract.RCNN_extractor(opt.input_channel, opt.output_channel) # self.Extract = Extract.ResNet_FeatureExtractor(opt.input_channel, opt.output_channel) self.FeatureExtraction_output = opt.output_channel # (imgH/16 -1 )* 512 self.AdaptiveAvgPool = nn.AdaptiveAvgPool2d((None,1)) # imgH/16-1 -> 1 # Sequence self.Seq = nn.Sequential( BidirectionalLSTM(self.FeatureExtraction_output, opt.hidden_size, opt.hidden_size), # BidirectionalLSTM(1536, opt.hidden_size, opt.hidden_size), BidirectionalLSTM(opt.hidden_size, opt.hidden_size, opt.hidden_size)) self.Seq_output = opt.hidden_size #Pred self.Pred = Pred.Attention(self.Seq_output, opt.hidden_size, opt.num_classes, device=device)
def __init__(self, opt, device): super(model, self).__init__() self.opt = opt #Trans self.Trans = Trans.TPS_SpatialTransformerNetwork( F=opt.num_fiducial, i_size=(opt.imgH, opt.imgW), i_r_size=(opt.imgH, opt.imgW), i_channel_num=opt.input_channel, device=device) #Extract if self.opt.extract == 'RCNN': self.Extract = self.Extract = Extract.RCNN_extractor( opt.input_channel, opt.output_channel) elif 'efficientnet' in self.opt.extract: self.Extract = Extract.EfficientNet(opt) elif 'resnet' in self.opt.extract: self.Extract = Extract.ResNet_FeatureExtractor( opt.input_channel, opt.output_channel) else: raise print('invalid extract model name!') # self.AdaptiveAvgPool = nn.AdaptiveAvgPool2d((None,1)) # imgH/16-1 -> 1 # Position aware module self.PAM = PositionEnhancement.PositionAwareModule( opt.output_channel, opt.hidden_size, opt.output_channel, 2) self.PAttnM_bot = PositionEnhancement.AttnModule( opt, opt.hidden_size, opt.bot_n_cls, device) self.PAttnM_mid = PositionEnhancement.AttnModule( opt, opt.hidden_size, opt.mid_n_cls, device) self.PAttnM_top = PositionEnhancement.AttnModule( opt, opt.hidden_size, opt.top_n_cls, device) # Hybrid branch self.Hybrid_bot = Hybrid.HybridBranch(opt.output_channel, opt.batch_max_length + 1, opt.bot_n_cls, device) self.Hybrid_mid = Hybrid.HybridBranch(opt.output_channel, opt.batch_max_length + 1, opt.mid_n_cls, device) self.Hybrid_top = Hybrid.HybridBranch(opt.output_channel, opt.batch_max_length + 1, opt.top_n_cls, device) # # Dynamically fusing module self.Dynamic_fuser_top = PositionEnhancement.DynamicallyFusingModule( opt.top_n_cls) self.Dynamic_fuser_mid = PositionEnhancement.DynamicallyFusingModule( opt.mid_n_cls) self.Dynamic_fuser_bot = PositionEnhancement.DynamicallyFusingModule( opt.bot_n_cls)