Ejemplo n.º 1
0
    def __data_generation(self, indexes):
        "Generates data containing batch_size samples"
        ctable = CharacterTable(self.input_tokens)
        x = np.zeros(
            (
                self.batch_size, 
                self.max_input_len, 
                self.num_input_tokens
            ),
            dtype="float32"
        )
        y = np.zeros(
            (
                self.batch_size,
                self.max_output_len,
                self.num_input_tokens
            ),
            dtype="float32"
        )
        input_exprs = list(self.input_exprs)
        output_exprs = list(self.output_exprs)
        batch_inputs = [input_exprs[i] for i in indexes]
        batch_outputs = [output_exprs[i] for i in indexes]

        for i, expr in enumerate(batch_inputs):
            if self.reverse:
                x[i] = np.flipud(ctable.encode(expr, self.max_input_len, reverse=True))
            else:
                x[i] = ctable.encode(expr, self.max_input_len)
        for i, expr in enumerate(batch_outputs):
            y[i] = ctable.encode(expr, self.max_output_len)

        return (x, y)
Ejemplo n.º 2
0
 def __init__(self, model_name, width=200, height=50, code_len=6, charsets=None):
     
     if not charsets:
         self.charsets = string.digits + string.lowercase
     self.charsets += '-'
     self.n_class = len(self.charsets)
     self.width = width
     self.height = height
     self.code_len = code_len
     self.ctable = CharacterTable(code_len=code_len, charsets=self.charsets)
     
     base_path = os.path.dirname(__file__)
     self.model = load_model(os.path.join(base_path, 'model/%s.h5' % model_name))
Ejemplo n.º 3
0
    def __init__(self, model_name, width=200, height=50, code_len=6, charsets=None):
        
        if not charsets:
            self.charsets = string.digits + string.lowercase
        else:
            self.charsets = charsets
        self.charsets += '-'
        self.n_class = len(self.charsets)
        self.width = width
        self.height = height
        self.code_len = code_len
 
        self.ctable = CharacterTable(code_len=code_len, charsets=self.charsets)
        self.model_name = model_name.strip().lower()
Ejemplo n.º 4
0
class TGcodesp(object):

    def __init__(self, model_name, width=200, height=50, code_len=6, charsets=None):
        
        if not charsets:
            self.charsets = string.digits + string.lowercase
        self.charsets += '-'
        self.n_class = len(self.charsets)
        self.width = width
        self.height = height
        self.code_len = code_len
        self.ctable = CharacterTable(code_len=code_len, charsets=self.charsets)
        
        base_path = os.path.dirname(__file__)
        self.model = load_model(os.path.join(base_path, 'model/%s.h5' % model_name))
    
    def img_preprocess(self, fname):
        """
        图片预处理
        """
        img = Image.open(fname).convert('RGB')
        w, h = img.size
        new_w = w * self.height / h
        img = img.resize((new_w, self.height), Image.ANTIALIAS)
        extra_blank = (self.width - img.width)/2
        img = img.crop((-extra_blank, 0, self.width-extra_blank, self.height))
        return img

    def predict(self, fname, code_len=None, detail=False):
       """
       使用模型进行预测
       """
       img = self.img_preprocess(fname)
       imgM = np.array(img).transpose(1, 0, 2)
       imgM = np.expand_dims(imgM, 0)
       y_pred = self.model.predict(imgM)
       y_pred = self.ctable.decode(y_pred, code_len)
       if detail:
           try:
               y_pred = self.convert_res(y_pred)
           except :
               y_pred = y_pred
       return str(y_pred)


    def convert_res(self, label):
        """
        对计算题类型最终结果进行转化
        """
        op_map = {'a':'+', 'b': '-', 'c': '*', 'd':'/'}
        res = ''
        for s in label:
            if s in op_map:
               s = op_map[s]
            res += s
        return eval(res)
Ejemplo n.º 5
0
#!/usr/bin/env python
# coding: utf-8

from keras.models import Model, Input, load_model
from keras.layers import *
from keras.callbacks import ModelCheckpoint, EarlyStopping
from keras import backend as K
import numpy as np
from PIL import Image
import random
import os
from utils import CharacterTable

max_len = 6
ctable = CharacterTable()


def load_data(path='img/train/origin_nacao',
              width=200,
              height=50,
              time_step=21):
    fnames = [
        os.path.join(path, fname) for fname in os.listdir(path)
        if fname.endswith('jpg')
    ]
    random.shuffle(fnames)
    data = np.zeros((len(fnames), width, height, 3))  # 数据类型
    input_len = np.ones(len(fnames),
                        dtype=np.uint8) * time_step  # 23-2 reshape时的维度
    input_label = np.zeros((len(fnames), max_len), dtype=np.uint8)
    label_len = np.zeros(len(fnames), dtype=np.uint8)
Ejemplo n.º 6
0
# Or we can load the sequences with their functions
df_func = pd.read_csv('func.top1000',
                      sep='\t',
                      names=['count', 'function'],
                      engine='c')
df_func['function_index'] = range(1, len(df_func) + 1)
func_dict = df_func.set_index('function_index')['function'].to_dict()

df = pd.read_csv('coreseed.train.tsv',
                 sep='\t',
                 engine='c',
                 nrows=10,
                 usecols=['peg', 'function_index', 'dna'])

ctable = CharacterTable(CHARS, MAXLEN)
n = df.shape[0]
x = np.zeros((n, MAXLEN, CHARLEN), dtype=np.byte)
for i, seq in enumerate(df['dna']):
    x[i] = ctable.encode(seq[:MAXLEN].lower())

y_proba = base_model.predict(x, verbose=1)
y_conf = y_proba.max(axis=-1)
y_pred = y_proba.argmax(axis=-1) + 1
y_true = df['function_index']

x_latent = model.predict(x, verbose=1)

for i in range(n):
    print('X[{}]: {}'.format(i, x[i]))
    print('Latent features:', x_latent[i])
Ejemplo n.º 7
0
    # Prepare validation data.
    text = read_text(data_path, val_books)
    val_tokens = tokenize(text)
    val_tokens = list(filter(None, val_tokens))

    val_maxlen = max([len(token) for token in val_tokens]) + 2
    val_encoder, val_decoder, val_target = transform(
        val_tokens, maxlen, error_rate=error_rate, shuffle=False)
    print(val_encoder[:10])
    print(val_decoder[:10])
    print(val_target[:10])
    print('Number of non-unique validation tokens =', len(val_tokens))
    print('Max sequence length in the validation set:', val_maxlen)

    # Define training and evaluation configuration.
    input_ctable  = CharacterTable(input_chars)
    target_ctable = CharacterTable(target_chars)

    train_steps = len(vocab) // train_batch_size
    val_steps = len(val_tokens) // val_batch_size

    # Compile the model.
    model, encoder_model, decoder_model = seq2seq(
        hidden_size, nb_input_chars, nb_target_chars)
    print(model.summary())

    # Train and evaluate.
    for epoch in range(nb_epochs):
        print('Main Epoch {:d}/{:d}'.format(epoch + 1, nb_epochs))
    
        train_encoder, train_decoder, train_target = transform(
Ejemplo n.º 8
0
class Tgcodesp(object):


    def __init__(self, model_name, width=200, height=50, code_len=6, charsets=None):
        
        if not charsets:
            self.charsets = string.digits + string.lowercase
        else:
            self.charsets = charsets
        self.charsets += '-'
        self.n_class = len(self.charsets)
        self.width = width
        self.height = height
        self.code_len = code_len
 
        self.ctable = CharacterTable(code_len=code_len, charsets=self.charsets)
        self.model_name = model_name.strip().lower()

    def create_model(self, train_path, nb_epoch=50, test_path=None):
       """
       创建新模型
       """
       model_names = [fname.split('/')[-1].split('.')[0] for fname in os.listdir('model')]
       if self.model_name in model_names:
           raise NameError("model name %s has existed! please change another one." % model_name)
       
       model, ctc_model = build_model(self.width, self.height, self.code_len, self.n_class)
       model = train_model(train_path, self.ctable, model, ctc_model, self.model_name, code_len=self.code_len, nb_epoch=nb_epoch, test_path=test_path)
       model.save('model/%s.h5' % self.model_name)
       return model
    

    def update_model(self, train_path, from_gne=True, nb_epoch=50, test_path=None):
       """
       根据旧模型进行调优
       """
       model_names = [fname.split('/')[-1].split('.')[0] for fname in os.listdir('model')]
       if self.model_name not in model_names:
           raise NameError("model name %s has not existed! please create model first." % model_name)

       model, ctc_model = build_model(self.width, self.height, self.code_len, self.n_class)
       if from_gne:
           ctc_model.load_weights("model/gne_weights.h5")
       else:
           ctc_model.load_weights("model/%s_weights.h5" % self.model_name)

       model = train_model(train_path, self.ctable, model, ctc_model, self.model_name, code_len=self.code_len, nb_epoch=nb_epoch, test_path=test_path)
       model.save('model/%s.h5' % self.model_name)
       return model
    

    def predict(self, model, fname):
       """
       使用模型进行预测
       """
       img = Image.open(fname).convert('RGB')
       img = img_preprocess(img, self.width, self.height)
       imgM = np.array(img).transpose(1, 0, 2)
       imgM = np.expand_dims(img, 0)
       y_pred = base_model.predict(imgM)
       y_pred = y_pred[:, 2:, :]
       y_pred = self.ctable.decode(y_pred)
       return y_pred
Ejemplo n.º 9
0
class Tgcodesp(object):
    def __init__(self,
                 model_name,
                 width=200,
                 height=50,
                 code_len=6,
                 charsets=None):
        """
        初始化模型的名字,相关字符集, 验证码最大长度
        """
        if not charsets:
            self.charsets = string.digits + string.lowercase
        else:
            self.charsets = charsets
        self.charsets += '-'
        self.n_class = len(self.charsets)
        self.width = width
        self.height = height
        self.code_len = code_len

        self.ctable = CharacterTable(code_len=code_len, charsets=self.charsets)
        self.model_name = model_name.strip().lower()

    def create_model(self, train_path, acc=0.92, nb_epoch=50, test_path=None):
        """
       创建新模型
       """
        model_names = [
            fname.split('/')[-1].split('.')[0] for fname in os.listdir('model')
        ]
        if self.model_name in model_names:
            raise NameError(
                "model name %s has existed! please change another one." %
                model_name)

        model, ctc_model = build_model(self.width, self.height, self.code_len,
                                       self.n_class)

        model = train_model(train_path,
                            self.ctable,
                            model,
                            ctc_model,
                            self.model_name,
                            self.code_len,
                            acc=acc,
                            nb_epoch=nb_epoch)
        model.save('model/%s.h5' % self.model_name)
        return model

    def update_model(self,
                     train_path,
                     acc=0.92,
                     nb_epoch=50,
                     from_gne=True,
                     test_path=None):
        """
       根据旧模型进行调优
       :type str: train_path 训练数据集路径
       :type float: acc: 训练集准确率停止条件
       :rtype :object model 训练好的预测模型
       """
        model_names = [
            fname.split('/')[-1].split('.')[0] for fname in os.listdir('model')
        ]
        if self.model_name not in model_names:
            raise NameError(
                "model name %s does not exist! please create it first." %
                model_name)

        model, ctc_model = build_model(self.width, self.height, self.code_len,
                                       self.n_class)
        if from_gne:
            ctc_model.load_weights("model/gne_weights.h5")
            for layer in model.layers[:5]:
                layer.trainable = False
        else:
            ctc_model.load_weights("model/%s_weights.h5" % self.model_name)
        print model.summary()
        model = train_model(train_path,
                            self.ctable,
                            model,
                            ctc_model,
                            self.model_name,
                            self.code_len,
                            acc=acc,
                            nb_epoch=nb_epoch,
                            test_path=test_path)
        model.save('model/%s.h5' % self.model_name)
        return model

    def predict(self, model, fname):
        """
       使用模型进行预测
       """
        img = Image.open(fname).convert('RGB')
        img = img_preprocess(img, self.width, self.height)
        imgM = np.array(img).transpose(1, 0, 2)
        imgM = np.expand_dims(img, 0)
        y_pred = base_model.predict(imgM)
        y_pred = self.ctable.decode(y_pred)
        return y_pred
Ejemplo n.º 10
0
def main():
    REVERSE = True

    file_path = "../predataset.csv"
    # file_path = "../../data/linear/test/test_data.csv"
    ds = pd.read_csv(file_path, header=None)
    input_ds = ds[0]
    output_ds = ds[1]

    with open("../rnn_save/dataset_param.json", "r") as file:
        setting = json.load(file)
    input_tokens = setting["input_tokens"]
    max_input_len = setting["max_input_len"]

    ctable = CharacterTable(input_tokens)

    # Load model from h5 file
    model = load_model("../rnn_save/model.h5")

    model.summary()

    correct_predict_count = 0
    total_test_count = 1000
    time_sum = 0
    max_time = 0
    min_time = 1
    for i in range(total_test_count):
        mba_expr = input_ds[i]
        simp_expr = output_ds[i]
        start_time = time.time()
        if REVERSE:
            x = np.flipud(ctable.encode(mba_expr, max_input_len, reverse=True))
        else:
            x = ctable.encode(mba_expr, max_input_len)

        y_pred = model.predict_classes([[x]], verbose=0)
        predict = ctable.decode(y_pred[0], calc_argmax=False)
        end_time = time.time()
        consume_time = end_time - start_time
        time_sum += consume_time
        if max_time < consume_time:
            max_time = consume_time
        if min_time > consume_time:
            min_time = consume_time
        # print('=' * 50)
        # print('M', mba_expr)
        # print('T', simp_expr)
        # print('P', predict, end=' ')
        print("No.%d" % (i + 1), end=' ')
        if simp_expr == predict.strip():
            print("\033[1;32m O \033[0m", end=' ')
            correct_predict_count += 1
        else:
            print("\033[1;31m X \033[0m", end=' ')
        print("Time = %.4f" % consume_time)

        # rowx, rowy = x[np.array([ind])], y[np.array([ind])]
    #     preds = model.predict_classes(rowx, verbose=0)
    #     mba_expr = ctable.decode(rowx[0])
    #     targ_expr = ctable.decode(rowy[0])
    #     predict = ctable.decode(preds[0], calc_argmax=False)
    #     print('M', mba_expr[::-1] if REVERSE else mba_expr)
    #     print('T', targ_expr)
    #     print('P', predict, end=' ')
    #     if targ_expr == predict:
    #         print(colors.ok + 'O' + colors.close)
    #         correct_predict_count += 1
    #     else:
    #         print(colors.fail + 'X' + colors.close)
    # print("C/T: %d/%d" % (correct_predict_count, total_test_count))
    print("#Correct predict: %d/%d" %
          (correct_predict_count, total_test_count))
    print("Correct rate: %.4f" % (correct_predict_count / total_test_count))
    print("Average solve time: %.4f" % (time_sum / total_test_count))
    print("Maximum solve time: %.4f" % (max_time))
    print("Minimum solve time: %.4f" % (min_time))