def __init__(self):
        checkpoint_file = os.path.join(MODEL_PATH, CHECKPOINT_FILE)
        classes_to_labels_flie = os.path.join(MODEL_PATH, LABELS_FILE)
        embedding_matrix_file = os.path.join(MODEL_PATH, EMBEDDING_MATRIX_FILE)
        # model_file = os.path.join(MODEL_PATH, 'model.pkl')
        tokenizer_file = os.path.join(MODEL_PATH, TOKENIZER_FILE)

        self.predicate_label = pickle.load(open(classes_to_labels_flie, 'rb'),
                                           encoding="iso-8859-1")
        embedding_matrix = pickle.load(open(embedding_matrix_file, 'rb'),
                                       encoding="iso-8859-1")
        # model = pickle.load(open(model_file, 'rb'), encoding="iso-8859-1")
        nb_words, EMBEDDING_DIM = embedding_matrix.shape

        label2id = {k: t.argmax() for k, t in self.predicate_label.items()}
        self.id2label = {_id: label for label, _id in label2id.items()}

        # nb_words, EMBEDDING_DIM = [8179, 200] #[100000, 200]
        self.model = make_model(nb_words, EMBEDDING_DIM, embedding_matrix,
                                len(self.predicate_label))
        self.model.load_weights(checkpoint_file)
        # model = load_model(checkpoint_file)

        self.tokenizer = pickle.load(open(tokenizer_file, 'rb'),
                                     encoding="iso-8859-1")
Example #2
0
def predict():

    checkpoint_file = os.path.join(MODEL_PATH, CHECKPOINT_FILE)
    classes_to_labels_flie = os.path.join(MODEL_PATH, LABELS_FILE)
    embedding_matrix_file = os.path.join(MODEL_PATH, EMBEDDING_MATRIX_FILE)
    # model_file = os.path.join(MODEL_PATH, 'model.pkl')
    tokenizer_file = os.path.join(MODEL_PATH, TOKENIZER_FILE)

    predicate_label = pickle.load(open(classes_to_labels_flie, 'rb'), encoding="iso-8859-1")
    embedding_matrix = pickle.load(open(embedding_matrix_file, 'rb'), encoding="iso-8859-1")
    # model = pickle.load(open(model_file, 'rb'), encoding="iso-8859-1")
    nb_words, EMBEDDING_DIM = embedding_matrix.shape
    label2id = {k: t.argmax() for k, t in predicate_label.items()}
    id2label = {_id: label for label, _id in label2id.items()}

    model = make_model(nb_words, EMBEDDING_DIM, embedding_matrix, len(predicate_label))
    model.load_weights(checkpoint_file)
    # model = load_model(checkpoint_file)

    tokenizer = pickle.load(open(tokenizer_file, 'rb'), encoding="iso-8859-1")

    test_data = read_data(DEV_FILE)

    raw_test_comments = [t[0] for t in test_data]

    test_y = np.array([predicate_label[t[1]] for t in test_data])

    processed_test_comments = []
    for comment in raw_test_comments:
        processed_test_comments.append(preprocess_text(comment))

    test_sequences = tokenizer.texts_to_sequences(processed_test_comments)

    final_test_data = pad_sequences(test_sequences, maxlen=150)
    # print('test_data', test_data[:3])
    print('模型评估')
    ret = model.predict(x=final_test_data, batch_size=1)
    # print('预测结果:', ret)
    # print('标注', '预测', '问题')
    rets = []
    for label, pred, question in zip(test_y, ret, test_data):
        print(id2label[label.argmax()], id2label[pred.argmax()], question)
        rets.append([id2label[label.argmax()], id2label[pred.argmax()], question])

    print('正确率:{}'.format(len([t for t in rets if t[0]==t[1]])/len(rets)))
def evaluate():

    checkpoint_file = os.path.join(MODEL_PATH, CHECKPOINT_FILE)
    classes_to_labels_flie = os.path.join(MODEL_PATH, LABELS_FILE)
    embedding_matrix_file = os.path.join(MODEL_PATH, EMBEDDING_MATRIX_FILE)
    # model_file = os.path.join(MODEL_PATH, 'model.pkl')
    tokenizer_file = os.path.join(MODEL_PATH, TOKENIZER_FILE)

    predicate_label = pickle.load(open(classes_to_labels_flie, 'rb'), encoding="iso-8859-1")
    embedding_matrix = pickle.load(open(embedding_matrix_file, 'rb'), encoding="iso-8859-1")
    # model = pickle.load(open(model_file, 'rb'), encoding="iso-8859-1")
    nb_words, EMBEDDING_DIM = embedding_matrix.shape
    model = make_model(nb_words, EMBEDDING_DIM, embedding_matrix, len(predicate_label))
    model.load_weights(checkpoint_file)
    # model = load_model(checkpoint_file)

    tokenizer = pickle.load(open(tokenizer_file, 'rb'), encoding="iso-8859-1")

    test_data = read_data(DEV_FILE)

    raw_test_comments = [t[0] for t in test_data]

    test_y = np.array([predicate_label[t[1]] for t in test_data])

    processed_test_comments = []
    for comment in raw_test_comments:
        processed_test_comments.append(preprocess_text(comment))

    test_sequences = tokenizer.texts_to_sequences(processed_test_comments)

    final_test_data = pad_sequences(test_sequences, maxlen=MAX_SEQUENCE_LENGTH)

    print('模型评估')
    list_of_metrics = model.evaluate(x=final_test_data, y=test_y, batch_size=32)

    for index, metric in enumerate(model.metrics_names):
        print(metric + ':', str(list_of_metrics[index]))
Example #4
0
def load_checkpoint(path):

    #state = torch.load(path)
    checkpoint = torch.load(path, map_location=lambda storage, loc: storage)

    drop = checkpoint['dropout']
    hidden_units = checkpoint['hidden_units']
    arch = checkpoint['arch']
    lr = checkpoint['lr']
    epochs = checkpoint['epochs']
    state_dict = checkpoint['state_dict']
    class_to_idx = checkpoint['class_to_idx']

    model = make_model(arch, hidden_units, drop)

    model.class_to_idx = class_to_idx

    print("Loading ", model.name, " checkpoint\n")
    if model.name == 'vgg16' or model.name == 'densenet121':
        model.classifier.load_state_dict(state_dict)
        optimizer = optim.Adam(model.classifier.parameters(), lr=lr)
    elif model.name == 'resnet50':
        model.fc.load_state_dict(state_dict)
        optimizer = optim.Adam(model.fc.parameters(), lr=lr)

    optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
    #############################################################
    #  following code is needed if we want train model further  #
    #  after loading from checkpoint since it requires gpu      #
    #############################################################
    #   for state in optimizer.state.values():
    #         for k, v in state.items():
    #             if isinstance(v, torch.Tensor):
    #                 state[k] = v.cuda()
    print(model.name, " loaded successfully\n")
    return model, optimizer
Example #5
0
    TGT = data.Field(tokenize=tokenize_en, init_token=BOS_WORD, eos_token=EOS_WORD, pad_token=BLANK_WORD)

    MAX_LEN = 100
    train, val, test = datasets.IWSLT.splits(
        exts=('.de', '.en'), fields=(SRC, TGT),
        filter_pred=lambda x: len(vars(x)['src']) <= MAX_LEN and len(vars(x)['trg']) <= MAX_LEN)
    MIN_FREQ = 2
    SRC.build_vocab(train.src, min_freq=MIN_FREQ)
    TGT.build_vocab(train.trg, min_freq=MIN_FREQ)


# devices = [0, 1, 2, 3]
devices = [0]
if True:
    pad_idx = TGT.vocab.stoi["<blank>"]
    model = make_model(len(SRC.vocab), len(TGT.vocab), N=2) # 6
    model.cuda()
    criterion = LabelSmoothing(size=len(TGT.vocab), padding_idx=pad_idx, smoothing=0.1)
    criterion.cuda()
    # BATCH_SIZE = 12000
    BATCH_SIZE = 100
    train_iter = MyIterator(train, batch_size=BATCH_SIZE, device=torch.device(0),
                            repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn, train=True)
    valid_iter = MyIterator(val, batch_size=BATCH_SIZE, device=torch.device(0),
                            repeat=False, sort_key=lambda x: (len(x.src), len(x.trg)),
                            batch_size_fn=batch_size_fn, train=False)
    model_par = nn.DataParallel(model, device_ids=devices)


if True:
        device = 'cuda'
        print("GPU mode enabled\n")
    else: 
        print("Device doesn't support CUDA\n")
        exit(0)
else:
    device = 'cpu'
    print("Further training will be done on cpu, switch to GPU\n")

print("Selected Device: ", device, "\n")

# load the datasets
data, loader =  load_data(data_dir)

# make model
model = make_model(arch, hidden_units, drop)
model.to(device)

# set optimizer state according to arch 
if model.name == 'vgg16' or model.name == 'densenet121':
    optimizer = optim.Adam(model.classifier.parameters(), lr=lr)
elif model.name == 'resnet50':
    optimizer = optim.Adam(model.fc.parameters(), lr=lr)
    
# train model, get new state of optimizer in order to save it in checkpoint
trained_model, optimizer = train_model(model, optimizer, epochs, device, data, loader)

# check accuracy
compute_accuracy(trained_model, loader, device)

# save model
Example #7
0
@app.route('/upload', methods=['GET', 'POST'])
def upload():
    if request.method == 'POST' and 'photo' in request.files:
        filename = photos.save(request.files['photo'])
        return predict(filename)
    return render_template('upload.html')


@app.route('/predict', methods=['GET', 'POST'])
def predict(filename = False):
    """Recieve the article to be classified from an input form and use the
    model to classify.
    """
    if filename:
        filename = 'static/img/' + filename
        display_image, tot_time = make_image(filename, graph)
        tot_time = "Total load time:" +str(tot_time)
    else:
        display_image = 'static/detected_img/three_dogs_detected.png'
        tot_time = ''
    print(display_image)
    return render_template('predict.html', user_image=display_image,
                            tot_time=tot_time
                            )


if __name__ == '__main__':
    bootstrap = Bootstrap(app)
    graph = make_model()
    app.run(host='0.0.0.0', debug=True)
Example #8
0
 def _init_model(self):
   flags = self.hparams.__dict__
   # Define TF model graph
   model = make_model(input_shape=self.input_shape, **flags)
   model.set_device(None)
   self.model = model
    cfg.log = ocaccel_workflow_log
    question_and_answer.cfg = cfg
    question_and_answer.ask(qa.ask_configure_str)
    if not options.no_configure:
        cfg.configure()

    # In unit sim mode, all configurations are handled automatically, no need to update the cfg
    if not options.unit_sim:
        cfg.update_cfg()

    if not options.no_env_check:
        env_check(options)

    question_and_answer.ask(qa.ask_make_model_str)
    if not options.no_make_model and options.simulator.lower() != "nosim":
        make_model(ocaccel_workflow_make_model_log, options.make_timeout)
        # TODO: need to remove the following line if
        # 'make model` stops touching ocaccel_env.sh
        cfg.setup_ocaccel_env()

    question_and_answer.ask(qa.ask_run_sim_str)
    if not options.no_run_sim and options.simulator.lower() != "nosim":
        testcase_cmdline = options.testcase.split(" ")
        testcase_cmd = None
        testcase_args = None
        if len(testcase_cmdline) > 1:
            testcase_cmd = testcase_cmdline[0]
            testcase_args = " ".join(testcase_cmdline[1:])
        elif len(testcase_cmdline) == 1:
            testcase_cmd = testcase_cmdline[0]
            testcase_args = " "
Example #10
0
 max_thickness = 350
 structs = []
 fitrs = []
 ln_posts = []
 fig_i = 0
 import data_in as di
 data = di.data_in("29553_54.dat")
 q, R, sim_dR = data[0], data[1], data[2]
 #print(q, R, sim_dR)
 from refnx.dataset import Data1D
 data = Data1D(data=(q, R, sim_dR))
 import make_model as mm
 for i in range(1, 2):
     thick = round(max_thickness / (i + 1.))
     names = []
     bs = []
     thicks = []
     roughs = []
     for j in range(i + 1):
         names.append('layers' + str(j))
         bs.append(5)
         thicks.append(thick)
         roughs.append(0)
     print(names, bs, thicks, roughs)
     structure, fitter, objective, fig_i = mm.make_model(
         names, bs, thicks, roughs, fig_i, data)
     ln_post = objective.logpost()
     print("log post out: ", ln_post, "\npost out: ", np.exp(ln_post))
     structs.append(structure)
     fitrs.append(fitter)
     ln_posts.append(ln_post)
Example #11
0
from LabelSmoothing import LabelSmoothing
from make_model import make_model
from NoamOpt import NoamOpt
import torch
from run_epoch import run_epoch
from data_gen import data_gen
from SimpleLossCompute import SimpleLossCompute



# Train the simple copy task.
V = 11
criterion = LabelSmoothing(size=V, padding_idx=0, smoothing=0.0)
model = make_model(V, V, N=2)
model_opt = NoamOpt(model.src_embed[0].d_model, 1, 400, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))

for epoch in range(10):
    model.train()
    run_epoch(data_gen(V, 30, 20), model, SimpleLossCompute(model.generator, criterion, model_opt))
    model.eval()
Example #12
0
def train():

    ##################################################
    ## forming sequeces to feed into the network.
    ##################################################

    # 词向量对应的词列表,及对应词向量
    words, vectors = load_word_vectors(file_path=FLAGS.emb_file)
    embedding_index = {w: vec for w, vec in zip(words, vectors)}

    EMBEDDING_DIM = len(vectors[0])
    print('Indexed the word vectors')
    print('Found %s word vectors.' % len(embedding_index))

    train_data = read_data(FLAGS.train_file)
    test_data = read_data(FLAGS.dev_file)  # [['怎么解释ALTHAUN', '定义']]

    # "id","comment_text","toxic","severe_toxic","obscene","threat","insult","identity_hate"
    raw_train_comments = [t[0] for t in train_data]
    raw_test_comments = [t[0] for t in test_data]
    classes_to_predict = list(
        set(t[1] for t in train_data) | set(t[1] for t in test_data))
    predicate_index = {
        predicate: _index
        for _index, predicate in enumerate(classes_to_predict)
    }

    predicate_categ = to_categorical(list(predicate_index.values()))

    predicate_label = {
        predicate: predicate_categ[_index]
        for predicate, _index in predicate_index.items()
    }

    pickle.dump(predicate_label,
                open(os.path.join(FLAGS.model_path, 'classes_to_labels.pkl'),
                     "wb"),
                protocol=2)

    print(predicate_label)

    y = np.array([predicate_label[t[1]] for t in train_data])
    test_y = np.array([predicate_label[t[1]] for t in test_data])

    #y_test_predicted = test_df[classes_to_predict].values

    processed_train_comments = []
    for comment in raw_train_comments:
        processed_train_comments.append(preprocess_text(comment))

    processed_test_comments = []
    for comment in raw_test_comments:
        processed_test_comments.append(preprocess_text(comment))

    tokenizer = Tokenizer(num_words=MAX_NB_WORDS)
    tokenizer.fit_on_texts(processed_train_comments + processed_test_comments)

    pickle.dump(tokenizer, open(tokenizer_name, "wb"), protocol=2)

    train_sequences = tokenizer.texts_to_sequences(processed_train_comments)
    test_sequences = tokenizer.texts_to_sequences(processed_test_comments)

    print('found {} tokens in text.'.format(len(tokenizer.word_index)))

    train_data = pad_sequences(train_sequences, maxlen=MAX_SEQUENCE_LENGTH)

    final_test_data = pad_sequences(test_sequences, maxlen=MAX_SEQUENCE_LENGTH)

    print('train shape: {}'.format(train_data.shape))
    print("final_test_data.shape: {}".format(final_test_data.shape))
    print('shape of label(y) is {}'.format(y.shape))

    ##################################################
    ## preparing word embeddings.
    ##################################################

    print('preparing embedding matrix')
    word_index = tokenizer.word_index
    nb_words = min(MAX_NB_WORDS, len(word_index)) + 1
    embedding_matrix = np.zeros((nb_words, EMBEDDING_DIM))
    for word, i in word_index.items():
        if (i > MAX_NB_WORDS):
            continue
        embedding_vector = embedding_index.get(word)
        if (embedding_vector is not None):
            embedding_matrix[i] = embedding_vector
    print('embedding matrix preparation complete')

    ##################################################
    ## train and validation split.
    ##################################################

    print(
        'creating train and validation data by dividing train_data in 80:20 ratio'
    )
    permutation = np.random.permutation(len(train_data))
    index_train = permutation[:int(len(train_data) * 0.8)]
    index_validation = permutation[int(len(train_data) * 0.2):]

    final_train_data = train_data[index_train]
    labels_of_train_data = y[index_train]

    final_validation_data = train_data[index_validation]
    labels_of_validation_data = y[index_validation]

    print('train data shape:', final_train_data.shape)
    print('validation data shape:', final_validation_data.shape)
    print('train and validation data are ready!!')

    ############################
    ## Keras model structure.
    ############################

    print("nb_words, EMBEDDING_DIM: {}".format([nb_words, EMBEDDING_DIM
                                                ]))  # [100000, 200]

    pickle.dump(embedding_matrix,
                open(os.path.join(FLAGS.model_path, 'embedding_matrix.pkl'),
                     "wb"),
                protocol=2)

    model = make_model(nb_words, EMBEDDING_DIM, embedding_matrix,
                       len(classes_to_predict))
    print(model.summary())

    # stamp = 'sentiment_with_lstm_and_glove_%.2f_%.2f'%(lstm_dropout_rate,dense_dropout_rate)
    # print(stamp)
    # best_model_path = stamp + '.h5'

    # best_model_path = os.path.join(FLAGS.model_path, 'checkpoint-{epoch:02d}-{val_loss:.2f}-{val_acc:.3f}.hdf5')
    best_model_path = os.path.join(FLAGS.model_path, CHECKPOINT_FILE)

    early_stopping = EarlyStopping(patience=2)
    model_checkpoint = ModelCheckpoint(best_model_path,
                                       save_best_only=True,
                                       save_weights_only=False)

    tb = TensorBoard(
        log_dir=FLAGS.log_path,  # log 目录
        histogram_freq=1,  # 按照何等频率(epoch)来计算直方图,0为不计算
        batch_size=32,  # 用多大量的数据计算直方图
        write_graph=True,  # 是否存储网络结构图
        write_grads=True,  # 是否可视化梯度直方图
        write_images=True,  # 是否可视化参数
        embeddings_freq=0,
        embeddings_layer_names=None,
        embeddings_metadata=None)

    hist = model.fit(x = final_train_data, y = labels_of_train_data,\
                     validation_data = (final_validation_data, labels_of_validation_data), \
                     epochs = 10, batch_size = 32, shuffle = True, \
                     callbacks = [early_stopping, model_checkpoint, tb])
    best_score = min(hist.history['val_loss'])
    print('best_score', best_score)

    #######################################
    ## time to make prediction!!!
    ########################################
    # y_test_predicted = model.predict([final_test_data], batch_size = 32, verbose = 1)

    print('模型评估')
    list_of_metrics = model.evaluate(x=final_test_data,
                                     y=test_y,
                                     batch_size=32)

    for index, metric in enumerate(model.metrics_names):
        print(metric + ':', str(list_of_metrics[index]))
Example #13
0
 def _init_model(self):
     flags = self.hparams.__dict__
     # Define TF model graph
     model = make_model(input_shape=self.input_shape, **flags)
     model.set_device(None)
     self.model = model
                            batch_size=batch_size,
                            nb_classes=nb_classes,
                            target_size=[320, 240],
                            nb_frames=nb_frames,
                            shuffle=False)

test_gen = flow_from_csv(test_data_path,
                            batch_size=batch_size,
                            nb_classes=nb_classes,
                            target_size=[320, 240],
                            nb_frames=nb_frames,
                            shuffle=False)


# FC層のみ学習
model = make_model(nb_classes, nb_frames, 320, 240, 'VGG19', train_bottom=False)
model = multi_gpu_model(model, gpus=gpu_count) # マルチGPU使用
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['acc'])
callbacks = []
callbacks.append(ModelCheckpoint(filepath='model_weights.h5', save_best_only=True, save_weights_only=True))
history = model.fit_generator(train_gen,
                steps_per_epoch=train_steps_per_epoch,
                epochs=30,
                callbacks=callbacks,
                validation_data=test_gen,
                validation_steps=test_steps_per_epoch,
                shuffle=False)

# 途中結果を表示
def plot_acc(history, save=False):
    acc = history.history['acc']