Example #1
0
    def _load_model(self):
        network = clstm_pb2.NetworkProto()
        with open(self.fname, 'rb') as f:
            network.ParseFromString(f.read())

        ninput = network.ninput
        noutput = network.noutput
        attributes = {a.key: a.value for a in network.attribute[:]}
        self.kind = attributes['kind']
        if len(attributes) > 1:
            lrate = float(attributes['learning_rate'])
            momentum = float(attributes['momentum'])
            self.trial = int(attributes['trial'])
            self.mode = "clstm"
        else:
            lrate = 1e-4
            momentum = 0.9
            self.trial = 0
            self.mode = 'clstm_compatibility'

        # Codec
        self.codec = kraken.lib.lstm.Codec()
        code2char, char2code = {}, {}
        for code, char in enumerate([126] + network.codec[1:]):
            code2char[code] = chr(char)
            char2code[chr(char)] = code
        self.codec.code2char = code2char
        self.codec.char2code = char2code

        # Networks
        networks = {}
        networks['softm'] = [
            n for n in network.sub[:] if n.kind == 'SoftmaxLayer'
        ][0]
        parallel = [n for n in network.sub[:] if n.kind == 'Parallel'][0]
        networks['lstm1'] = [
            n for n in parallel.sub[:] if n.kind.startswith('NPLSTM')
        ][0]
        rev = [n for n in parallel.sub[:] if n.kind == 'Reversed'][0]
        networks['lstm2'] = rev.sub[0]

        nhidden = int(networks['lstm1'].attribute[0].value)

        weights = {}
        for n in networks:
            weights[n] = {}
            for w in networks[n].weights[:]:
                weights[n][w.name] = np.array(w.value).reshape(w.dim[:])
        self.weights = weights

        weightnames = ('WGI', 'WGF', 'WCI', 'WGO')
        weightname_softm = 'W1'
        if self.mode == 'clstm_compatibility':
            weightnames = ('.WGI', '.WGF', '.WCI', '.WGO')
            weightname_softm = '.W'
        # lstm
        ih_hh_splits = torch.cat([torch.from_numpy(w.astype('float32')) \
                                  for w in [weights['lstm1'][wn] \
                                        for wn in weightnames]],0).split(ninput+1,1)
        weight_ih_l0 = ih_hh_splits[0]
        weight_hh_l0 = torch.cat(ih_hh_splits[1:], 1)

        # lstm_reversed
        ih_hh_splits = torch.cat([torch.from_numpy(w.astype('float32')) \
                                  for w in [weights['lstm2'][wn] \
                                        for wn in weightnames]],0).split(ninput+1,1)
        weight_ih_l0_rev = ih_hh_splits[0]
        weight_hh_l0_rev = torch.cat(ih_hh_splits[1:], 1)

        # softmax
        weight_softm = torch.from_numpy(
            weights['softm'][weightname_softm].astype('float32'))
        if self.mode == "clstm_compatibility":
            weight_softm = torch.cat(
                [torch.zeros(len(weight_softm), 1), weight_softm], 1)

        # attach weights
        self.rnn = TBIDILSTM(ninput, nhidden, noutput)
        self.rnn.rnn.weight_ih_l0 = nn.Parameter(weight_ih_l0)
        self.rnn.rnn.weight_hh_l0 = nn.Parameter(weight_hh_l0)
        self.rnn.rnn.weight_ih_l0_reverse = nn.Parameter(weight_ih_l0_rev)
        self.rnn.rnn.weight_hh_l0_reverse = nn.Parameter(weight_hh_l0_rev)
        self.rnn.decoder.weight = nn.Parameter(weight_softm)

        self.setLearningRate(lrate, momentum)
        self.rnn.zero_grad()

        self.criterion = CTCLoss()

        if self.cuda_available:
            self.cuda()
Example #2
0
    def load_clstm_model(cls, path: str):
        """
        Loads an CLSTM model to VGSL.
        """
        net = clstm_pb2.NetworkProto()
        with open(path, 'rb') as fp:
            try:
                net.ParseFromString(fp.read())
            except Exception:
                raise KrakenInvalidModelException('File does not contain valid proto msg')
            if not net.IsInitialized():
                raise KrakenInvalidModelException('Model incomplete')

        input = net.ninput
        attrib = {a.key: a.value for a in list(net.attribute)}
        # mainline clstm model
        if len(attrib) > 1:
            mode = 'clstm'
        else:
            mode = 'clstm_compat'

        # extract codec
        codec = PytorchCodec([''] + [chr(x) for x in net.codec[1:]])

        # separate layers
        nets = {}
        nets['softm'] = [n for n in list(net.sub) if n.kind == 'SoftmaxLayer'][0]
        parallel = [n for n in list(net.sub) if n.kind == 'Parallel'][0]
        nets['lstm1'] = [n for n in list(parallel.sub) if n.kind.startswith('NPLSTM')][0]
        rev = [n for n in list(parallel.sub) if n.kind == 'Reversed'][0]
        nets['lstm2'] = rev.sub[0]

        hidden = int(nets['lstm1'].attribute[0].value)

        weights = {}  # type: Dict[str, torch.Tensor]
        for n in nets:
            weights[n] = {}
            for w in list(nets[n].weights):
                weights[n][w.name] = torch.Tensor(w.value).view(list(w.dim))

        if mode == 'clstm_compat':
            weightnames = ('.WGI', '.WGF', '.WCI', '.WGO')
            weightname_softm = '.W'
        else:
            weightnames = ('WGI', 'WGF', 'WCI', 'WGO')
            weightname_softm = 'W1'

        # input hidden and hidden-hidden weights are in one matrix. also
        # CLSTM/ocropy likes 1-augmenting every other tensor so the ih weights
        # are input+1 in one dimension.
        t = torch.cat(list(w for w in [weights['lstm1'][wn] for wn in weightnames]))
        weight_ih_l0 = t[:, :input+1]
        weight_hh_l0 = t[:, input+1:]

        t = torch.cat(list(w for w in [weights['lstm2'][wn] for wn in weightnames]))
        weight_ih_l0_rev = t[:, :input+1]
        weight_hh_l0_rev = t[:, input+1:]

        weight_lin = weights['softm'][weightname_softm]
        if mode == 'clstm_compat':
            weight_lin = torch.cat([torch.zeros(len(weight_lin), 1), weight_lin], 1)

        # build vgsl spec and set weights
        nn = cls('[1,1,0,{} Lbxc{} O1ca{}]'.format(input, hidden, len(net.codec)))
        nn.nn.L_0.layer.weight_ih_l0 = torch.nn.Parameter(weight_ih_l0)
        nn.nn.L_0.layer.weight_hh_l0 = torch.nn.Parameter(weight_hh_l0)
        nn.nn.L_0.layer.weight_ih_l0_reverse = torch.nn.Parameter(weight_ih_l0_rev)
        nn.nn.L_0.layer.weight_hh_l0_reverse = torch.nn.Parameter(weight_hh_l0_rev)
        nn.nn.O_1.lin.weight = torch.nn.Parameter(weight_lin)

        nn.add_codec(codec)

        return nn
Example #3
0
    def save_model(self, path):
        network = clstm_pb2.NetworkProto(kind='Stacked',
                                         ninput=self.rnn.ninput,
                                         noutput=self.rnn.noutput)

        network.codec.extend([0] +
                             [ord(c)
                              for c in self.codec.code2char.values()][1:])

        network.attribute.extend([
            clstm_pb2.KeyValue(key='kind', value='bidi'),
            clstm_pb2.KeyValue(key='learning_rate',
                               value='{:4f}'.format(self.rnn.learning_rate)),
            clstm_pb2.KeyValue(key='momentum',
                               value='{:4f}'.format(self.rnn.momentum)),
            clstm_pb2.KeyValue(key='trial', value=repr(self.trial))
        ])

        hiddenattr = clstm_pb2.KeyValue(key='nhidden',
                                        value=repr(self.rnn.nhidden))
        networks = {}
        networks['paral'] = clstm_pb2.NetworkProto(kind='Parallel',
                                                   ninput=self.rnn.ninput,
                                                   noutput=self.rnn.nhidden *
                                                   2)

        networks['lstm1'] = clstm_pb2.NetworkProto(kind='NPLSTM',
                                                   ninput=self.rnn.ninput,
                                                   noutput=self.rnn.nhidden)
        networks['lstm1'].attribute.extend([hiddenattr])

        networks['rev'] = clstm_pb2.NetworkProto(kind='Reversed',
                                                 ninput=self.rnn.ninput,
                                                 noutput=self.rnn.nhidden)
        networks['lstm2'] = clstm_pb2.NetworkProto(kind='NPLSTM',
                                                   ninput=self.rnn.ninput,
                                                   noutput=self.rnn.nhidden)
        networks['lstm2'].attribute.extend([hiddenattr])

        networks['softm'] = clstm_pb2.NetworkProto(kind='SoftmaxLayer',
                                                   ninput=self.rnn.nhidden * 2,
                                                   noutput=self.rnn.noutput)
        networks['softm'].attribute.extend([hiddenattr])

        # weights
        weights = {}
        weights['lstm1'] = {}
        weights['lstm2'] = {}
        weights['softm'] = {}
        weights['lstm1']['WGI'], weights['lstm1']['WGF'], weights['lstm1']['WCI'], weights['lstm1']['WGO'] = \
            torch.cat([self.rnn.rnn.weight_ih_l0, self.rnn.rnn.weight_hh_l0], 1).split(self.rnn.nhidden, 0)
        weights['lstm2']['WGI'], weights['lstm2']['WGF'], weights['lstm2']['WCI'], weights['lstm2']['WGO'] = \
            torch.cat([self.rnn.rnn.weight_ih_l0_reverse, self.rnn.rnn.weight_hh_l0_reverse], 1).split(self.rnn.nhidden, 0)
        weights['softm']['W1'] = self.rnn.decoder.weight

        for n in weights.keys():
            for w in sorted(weights[n].keys()):
                warray = clstm_pb2.Array(name=w,
                                         dim=list(weights[n][w].size()))
                for v in weights[n][w].data.cpu().numpy().tolist():
                    warray.value.extend(v)
                networks[n].weights.extend([warray])

        networks['rev'].sub.extend([networks['lstm2']])
        networks['paral'].sub.extend([networks['lstm1'], networks['rev']])
        network.sub.extend([networks['paral'], networks['softm']])

        with open(path, 'wb') as fp:
            fp.write(network.SerializeToString())