def __init__(self, n_grapheme=168, n_vowel=11, n_consonant=7, dropout=.0): super().__init__() self.n_grapheme = n_grapheme self.n_vowel = n_vowel self.n_consonant = n_consonant self.dropout = dropout # feature extraction # input 128x128 -> 4x4; 160x160 -> 5x5; 168x168 -> 6x6 self.feature_extractor = se_resnext50_32x4d() # dense feature self.gfeats = nn.Sequential(GeM(), nn.Flatten()) # classifier self.cls_g = nn.Sequential( nn.Linear(2048, 256), nn.BatchNorm1d(256), Mish(), nn.Dropout(p=dropout, inplace=True), nn.Linear(256, self.n_grapheme), ) self.cls_v = nn.Sequential( nn.Linear(2048, 256), nn.BatchNorm1d(256), Mish(), nn.Dropout(p=dropout, inplace=True), nn.Linear(256, self.n_vowel), ) self.cls_c = nn.Sequential( nn.Linear(2048, 256), nn.BatchNorm1d(256), Mish(), nn.Dropout(p=dropout, inplace=True), nn.Linear(256, self.n_consonant), )
def __init__(self, n_grapheme=168, n_vowel=11, n_consonant=7, dropout=.0): super().__init__() self.n_grapheme = n_grapheme self.n_vowel = n_vowel self.n_consonant = n_consonant self.dropout = dropout # feature extraction # input 128x128 -> 4x4; 160x160 -> 5x5; 168x168 -> 6x6 self.feature_extractor = se_resnext50_32x4d() # dense feature self.cls_pool = nn.Sequential( # 2048x2x2 nn.MaxPool2d(2, stride=2, padding=0), nn.BatchNorm2d(2048), Mish(), # 8192 nn.Flatten(), ) self.gfeats = nn.Sequential( GeM(), nn.Flatten(), nn.BatchNorm1d(n_grapheme + n_vowel + n_consonant), Mish()) # classifier self.cls_g = nn.Linear(8192, n_grapheme) self.cls_v = nn.Linear(8192, n_vowel) self.cls_c = nn.Linear(8192, n_consonant)
def options2model_kwargs(parameters): kwargs = options2dataset_kwargs(parameters) if parameters.mish: kwargs['activation'] = Mish() else: kwargs['activation'] = nn.ReLU() return kwargs
def options2model_kwargs(parameters): kargs = dict() if parameters.mish: kargs['activation'] = Mish() else: kargs['activation'] = nn.ReLU() return kargs
def __init__(self, n_grapheme=168, n_vowel=11, n_consonant=7, dropout=.1, output_features=False): super().__init__() self.n_grapheme = n_grapheme self.n_vowel = n_vowel self.n_consonant = n_consonant self.dropout = dropout self.feat_out = output_features # feature extraction # input 128x128 -> 4x4; 160x160 -> 5x5; 168x168 -> 6x6 self.feature_extractor = se_resnext50_32x4d() # global pooling self.gpoolmax = nn.AdaptiveMaxPool2d((1, 1)) self.gpoolavg = nn.AdaptiveAvgPool2d((1, 1)) # classifier self.cls_g = nn.Sequential( nn.Linear(2048, 512), nn.BatchNorm1d(512), Mish(), nn.Dropout(p=dropout, inplace=True), nn.Linear(512, self.n_grapheme), ) self.cls_v = nn.Sequential( nn.Linear(2048, 512), nn.BatchNorm1d(512), Mish(), nn.Dropout(p=dropout, inplace=True), nn.Linear(512, self.n_vowel), ) self.cls_c = nn.Sequential( nn.Linear(2048, 512), nn.BatchNorm1d(512), Mish(), nn.Dropout(p=dropout, inplace=True), nn.Linear(512, self.n_consonant), )
def __init__(self, n_grapheme=168, n_vowel=11, n_consonant=7, dropout=.0): super().__init__() self.n_grapheme = n_grapheme self.n_vowel = n_vowel self.n_consonant = n_consonant self.dropout = dropout # feature extraction # input 128x128 -> 4x4; 160x160 -> 5x5; 168x168 -> 6x6 self.feature_extractor = se_resnext50_32x4d() # dense feature self.gfeats = nn.Sequential(GeM_Spatial(), nn.Flatten(), nn.Linear(8192, 512), nn.BatchNorm1d(512), Mish()) self.vcfeats = nn.Sequential( nn.BatchNorm1d(self.n_vowel + self.n_consonant), Mish()) # classifier self.cls_g = nn.Linear(512 + self.n_vowel + self.n_consonant, self.n_grapheme) self.cls_v = nn.Linear(512, self.n_vowel) self.cls_c = nn.Linear(512, self.n_consonant)