Example #1
0
def video():
    model = get_model()
    cap = cv2.VideoCapture(0)
    cv2.namedWindow('OCR')
    last_seen = "Number: NaN"

    while(True):
        ret, frame = cap.read()
        gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        thresh = binarize(gray)
        key = cv2.waitKey(1)

        contours = find_digits(thresh)
        draw_contours(frame, contours)
        if key == ord('p'):
            digits = insert_into_center(resize_digits(contours))
            if digits:
                X = preprocess(digits)

                prediction = np.argmax(model.predict(X), axis=1)
                last_seen = "Number: " + "".join(map(str, prediction))

                # print last_seen
                # plt.imshow(np.hstack(tuple(digits)), cmap=plt.cm.binary)
                # plt.show()

        cv2.putText(frame, last_seen, (10, 100), cv2.FONT_HERSHEY_SIMPLEX, 2, 0)
        cv2.imshow('OCR', frame)
        if key == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
Example #2
0
def get_update(Ws_s, bs_s):
    x, fx = train.get_model(Ws_s, bs_s)

    # Ground truth (who won)
    y = T.vector('y')

    # Compute loss (just log likelihood of a sigmoid fit)
    y_pred = sigmoid(fx)
    loss = -( y * T.log(y_pred) + (1 - y) * T.log(1 - y_pred)).mean()

    # Metrics on the number of correctly predicted ones
    frac_correct = ((fx > 0) * y + (fx < 0) * (1 - y)).mean()

    # Updates
    learning_rate_s = T.scalar(dtype=theano.config.floatX)
    momentum_s = T.scalar(dtype=theano.config.floatX)
    updates = train.nesterov_updates(loss, Ws_s + bs_s, learning_rate_s, momentum_s)
    
    f_update = theano.function(
        inputs=[x, y, learning_rate_s, momentum_s],
        outputs=[loss, frac_correct],
        updates=updates,
        )

    return f_update
Example #3
0
def get_predict(Ws_s, bs_s):
    x, p = train.get_model(Ws_s, bs_s)
    
    predict = theano.function(
        inputs=[x],
        outputs=p)

    return predict
Example #4
0
def static_image():
    model = get_model()
    frame = get_sample_image()
    contours = find_digits(binarize(frame.copy()))
    draw_contours(frame, contours)
    digits = insert_into_center(resize_digits(contours))
    X = preprocess(digits)
    print np.argmax(model.predict(X), axis=1)
    plt.imshow(np.hstack(tuple(digits)), cmap=plt.cm.binary)
    plt.show()
Example #5
0
def get_model_from_pickle(fn):
    f = open(fn)
    Ws, bs = pickle.load(f)
    
    Ws_s, bs_s = train.get_parameters(Ws=Ws, bs=bs)
    x, p = train.get_model(Ws_s, bs_s)
    
    predict = theano.function(
        inputs=[x],
        outputs=p)

    return predict
Example #6
0
# evaluate
import torch

from data.shapeNet import ShapeDiffDataset
from modules.configUtils import get_args
from modules.cuboid import get_cuboid_corner
from train import get_model
from utils.visualization import plot_pc_mayavi

params = get_args()
dev = torch.device("cuda") if torch.cuda.is_available() else torch.device(
    "cpu")

model_path = "C:\\Users\\sharon\\Documents\\Research\\models\\model_1011_1134.pt"
model, _ = get_model()
model.load_state_dict(torch.load(model_path, map_location=dev))
model.eval()

train_dataset = ShapeDiffDataset(params.train_path,
                                 params.bins,
                                 dev=dev,
                                 seed=0)
train_loader = torch.utils.data.DataLoader(train_dataset,
                                           params.batch_size,
                                           shuffle=False)

if __name__ == '__main__':
    total_acc = 0.
    for i, (x, d, h) in enumerate(train_loader):
        if i == 1:
            break
Example #7
0
                               thickness=10):
    # Adds a bounding box to an image.
    draw = ImageDraw.Draw(image)
    im_width, im_height = image.size
    (left, right, top, bottom) = (xmin, xmax, ymin, ymax)
    draw.line(
        [(left, top), (left, bottom), (right, bottom), (right, top),
         (left, top)],
        width=thickness,
    )


if __name__ == "__main__":
    # initialize model and optimizer
    num_classes = 2  # 1 class (wheat head) + background
    model = train.get_model(num_classes)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    # load existing checkpoint (comment if none existing)
    atepoch = 10
    PATH = f"/mnt/disks/extra/model_v1/model_weights_v1_{atepoch}.tar"
    checkpoint = torch.load(PATH, map_location=device)
    model.load_state_dict(checkpoint['model_state_dict'])

    root = "/home/ali/kaggle-wheat-detection/test"
    df = output(model, root=root)
    df.to_csv("/mnt/disks/extra/model_v1/submission_{atepoch}.csv",
              index=False)

    # display a case:
    img_filename = "51b3e36ab.jpg"
    img_path = os.path.join(root, img_filename)
Example #8
0
from prepare_data import load_and_preprocess_image


def get_class_id(image_root):
    id_cls = {}
    for i, item in enumerate(os.listdir(image_root)):
        if os.path.isdir(os.path.join(image_root, item)):
            id_cls[i] = item
    return id_cls


if __name__ == '__main__':
    # GPU settings
    gpus = tf.config.list_physical_devices('GPU')
    if gpus:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)

    model = get_model()
    model.load_weights(filepath=save_model_dir)

    image_raw = tf.io.read_file(filename=test_image_dir)
    image_tensor = load_and_preprocess_image(image_raw)
    image_tensor = tf.expand_dims(image_tensor, axis=0)

    pred = model(image_tensor, training=False)
    idx = tf.math.argmax(pred, axis=-1).numpy()[0]

    id_cls = get_class_id("./original_dataset")

    print("The predicted category of this picture is: {}".format(id_cls[idx]))
Example #9
0
def grid_search(word_embs=None,
                ent_embs=None,
                logger=None,
                datasets=None,
                model_dir=None,
                train_dataset=None,
                args=None,
                file_stores=None):
    param_grid = {'dp': [0.1, 0.2, 0.3, 0.5, 0.8],
                  'hidden_size': [1000, 2000, 3000],
                  'lr': [1e-2, 5e-2, 1e-3, 5e-2],
                  'wd': [1e-4, 1e-5],
                  'num_candidates': [128, 256],
                  'prop_gen_candidates': [0.5, 0.75, 1],
                  'other_optim': ['rmsprop', 'adam']
                  }
    grid_results_dict = {}
    pd_results = list()
    data_types = args.data_types.split(',')

    for param_dict in list(ParameterSampler(param_grid, 50)):
        for k, v in param_dict.items():
            assert k in args.__dict__
            args.__dict__[k] = v

        for dataset in list(datasets.values()) + [train_dataset]:
            dataset.num_cand_gen = int(param_dict['num_candidates'] * param_dict['prop_gen_candidates'])
            dataset.num_candidates = param_dict['num_candidates']

        model = get_model(args, word_embs, ent_embs, logger)
        train_loader = train_dataset.get_loader(batch_size=args.batch_size,
                                                shuffle=False,
                                                num_workers=args.num_workers,
                                                drop_last=False)
        logger.info("Train loader created.There will be {} batches.".format(len(train_loader)))

        logger.info("GRID SEARCH PARAMS : {}".format(param_dict))
        result_key = tuple(param_dict.items())
        grid_results_dict[result_key] = {data_type: [] for data_type in data_types}

        logger.info("Starting validation for untrained model.....")
        validators = {}
        for data_type in data_types:
            loader = datasets[data_type].get_loader(batch_size=args.batch_size,
                                                    shuffle=False,
                                                    num_workers=args.num_workers,
                                                    drop_last=False)
            logger.info(f'Len loader {data_type} : {len(loader)}')
            validators[data_type] = Validator(loader=loader,
                                              args=args,
                                              file_stores=file_stores)

        trainer = Trainer(loader=train_loader,
                          args=args,
                          validator=validators,
                          model=model,
                          model_type='yamada',
                          grid_results_dict=grid_results_dict,
                          result_key=result_key)

        logger.info("Starting Training.....")
        print()
        best_model, best_results = trainer.train()
        logger.info("Finished Training")

        pd_results.append({**param_dict, **best_results})
        print('PD RESULTS: {}'.format(pd_results))
        df = pd.DataFrame(pd_results)
        df.to_csv(join(model_dir, 'hyper_df.csv'))

        for k, v in grid_results_dict.items():
            print(k)
            print(v)

        with open(join(model_dir, 'grid_search_results.pickle'), 'wb') as f:
            pickle.dump(grid_results_dict, f)

        del model, trainer, train_loader, loader, validators, best_model, best_results
        torch.cuda.empty_cache()
        gc.collect()
        time.sleep(3)

    return grid_results_dict, pd_results
Example #10
0
def main(FLAGS):

    print("[%s: INFO] Start Evaluation: %s" % (datetime.now(), str(FLAGS)))

    with tf.Graph().as_default():
        valid_set = Dataset(
            FLAGS.valid_file,
            num_data=NUM_CLASSES * NUM_TEST_PER_CLASS,
            # batch_size=FLAGS.batch_size,
            batch_size=10,
            for_training=False)
        # valid_set = Dataset(TRAIN_TFRECORD,
        #                     num_data=NUM_CLASSES*NUM_TRAIN_PER_CLASS,
        #                     batch_size=FLAGS.batch_size,
        #                     for_training=False)

        model = get_model(FLAGS)

        with tf.Session() as sess:
            saver = tf.train.Saver()

            # validity of checkpoint
            if not tf.train.checkpoint_exists(FLAGS.checkpoint):
                print("[%s: ERROR] Checkpoint does not exist! : %s" %
                      (datetime.now(), FLAGS.checkpoint))
                return

            saver.restore(sess, FLAGS.checkpoint)

            ### test!
            for thres in [0.2, 0.3, 0.4, 0.5]:
                print("localization_thres: %.2f" % thres)
                top_1_loc, gt_known_loc, top_1_class = inference(
                    model,
                    sess,
                    valid_set,
                    localization_thres=thres,
                    vis_thres=0.9,
                    multi_crop=FLAGS.do_multi_crop,
                    do_vis=FLAGS.do_vis)
                print("Top-1 Loc: %.4f, GT-known Loc: %.4f, Top-l Clas: %.4f" %
                      (top_1_loc, gt_known_loc, top_1_class))
                print("---------------------")

            # inference(model, sess, valid_set,
            #           localization_thres=0.3,
            #           vis_thres=0.9,
            #           multi_crop=False,
            #           do_vis=False)
            # print("---------------------")
            # inference(model, sess, valid_set, localization_thres=0.3, vis_thres=0.9)
            # print("---------------------")
            # inference(model, sess, valid_set, localization_thres=0.4, vis_thres=0.9)
            # print("---------------------")
            # inference(model, sess, valid_set, localization_thres=0.5, vis_thres=0.9)

            # loss, accuracy = validation(model, sess, valid_set)

            # print("[%s: INFO] valuation Result of testset: loss: %.3f, accuracy: %.3f" %
            #       (datetime.now(), loss, accuracy))

    print("[%s: INFO] Done" % (datetime.now()))
Example #11
0
def get_predict(Ws_s, bs_s):
    x, p = train.get_model(Ws_s, bs_s)

    predict = theano.function(inputs=[x], outputs=p)

    return predict
Example #12
0
            pbar.update(1)

    if generate_txt:
        answer_file.close()

    return np.mean(aucs), np.mean(mrrs), np.mean(ndcg5s), np.mean(ndcg10s)


if __name__ == '__main__':
    # avoid circular import
    from train import parse_arguments, get_model, restore_checkpoint

    parser = argparse.ArgumentParser(description='Eval params')
    config = parse_arguments(parser)

    model = get_model(config)
    model, is_sucessfull = restore_checkpoint(config, model, is_train=False)

    if not is_sucessfull:
        print('No checkpoint file found!')
        exit()

    prediction_folder = f'{config.val_dir}/{config.model_name}'
    Path(prediction_folder).mkdir(parents=True, exist_ok=True)
    if config.model_name.startswith('DM'):
        auc, mrr, ndcg5, ndcg10 = evaluate_dm(config,
                                              model,
                                              config.dev_dir,
                                              config.train_dir,
                                              generate_txt=True,
                                              txt_path=prediction_folder +
Example #13
0
def train_epochs(epochs, batch_size, token_size, hidden_size, embedding_size):

    # Read data

    x_train_full = open("../input/wili-2018/x_train.txt").read().splitlines()
    y_train_full = open("../input/wili-2018/y_train.txt").read().splitlines()

    x_test_full = open("../input/wili-2018/x_test.txt").read().splitlines()
    y_test_full = open("../input/wili-2018/y_test.txt").read().splitlines()

    # Get encoders

    char_vocab = Dictionary().char_dict(x_train_full)
    lang_vocab = Dictionary().lang_dict(y_train_full)

    # Convert data

    x_train_idx, y_train_idx = Encoder().encode_labeled_data(
        x_train_full,
        y_train_full,
        char_vocab,
        lang_vocab)
    x_test_idx, y_test_idx = Encoder().encode_labeled_data(
        x_test_full,
        y_test_full,
        char_vocab,
        lang_vocab)

    x_train, x_val, y_train, y_val = train_test_split(x_train_idx, y_train_idx, test_size=0.15)

    train_data = [(x, y) for x, y in zip(x_train, y_train)]
    val_data = [(x, y) for x, y in zip(x_val, y_val)]
    test_data = [(x, y) for x, y in zip(x_test_idx, y_test_idx)]

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    if not torch.cuda.is_available():
        logging.warning("WARNING: CUDA is not available.")

    criterion = torch.nn.CrossEntropyLoss(reduction='sum')

    bidirectional = False
    ntokens = len(char_vocab)
    nlabels = len(lang_vocab)
    pad_index = char_vocab.pad_index

    model, optimizer = get_model(
        ntokens,
        embedding_size,
        hidden_size,
        nlabels,
        bidirectional,
        pad_index,
        device)

    with mlflow.start_run():

        mlflow.log_metrics(
            {
                "train samples": len(train_data),
                "val samples": len(val_data),
                "test samples": len(test_data)
                }
            )

        mlflow.log_dict(lang_vocab.token2idx, "lang_vocab.json")
        mlflow.log_dict(char_vocab.token2idx, "char_vocab.json")
        params = {'epochs': epochs, 'batch_size': batch_size, 'token_size': token_size, 'hidden_size': hidden_size, 'embedding_size': embedding_size}
        mlflow.log_dict(params, "params.json")

        logging.info(f'Training cross-validation model for {epochs} epochs')

        for epoch in range(epochs):
            train_acc = train(model, optimizer, train_data, batch_size, token_size, criterion, device)
            logging.info(f'| epoch {epoch:02d} | train accuracy={train_acc:.1f}%')

            validate(model, val_data, batch_size, token_size, device, lang_vocab, tag='val', epoch=epoch)
            validate(model, test_data, batch_size, token_size, device, lang_vocab, tag='test', epoch=epoch)

            mlflow.pytorch.log_model(model, f'{epoch:02d}.model')

    mlflow.pytorch.log_model(model, 'model')
def main(*kargs, **kwargs):

    # ============ Parse global parameters ============
    get_kwargs(kwargs)
    train_fname = kwargs['train']
    test_fname = kwargs['test']
    result_fname = kwargs['output']
    embeds_fname = kwargs['embeds']
    logger_fname = kwargs['logger']
    warm_start = kwargs['warm_start']
    model_warm_start = [model.lower() for model in kwargs['model_warm_start']]
    config = kwargs['config']
    train_clean = kwargs['train_clean']
    train_labels = kwargs['train_labels']
    test_clean = kwargs['test_clean']
    embeds_clean = kwargs['embeds_clean']
    result_path = './catboost/'

    if not os.path.exists(result_path):
        os.mkdir(result_path)

    # cnn_model_file = 'data/cnn.h5'
    # lstm_model_file = 'data/lstm_model.h5'
    # gru_model_file = 'data/gru_model.h5'
    # concat_model_file = 'data/concat.h5'
    # cnn_model_file = 'data/cnn.h5'
    # lr_model_file = 'data/{}_logreg.bin'
    # meta_catboost_model_file = 'data/{}_meta_catboost.bin'

    # ==== Create logger ====
    logger = Logger(logging.getLogger(), logger_fname)

    # ==== Load data ====
    logger.info('Loading data...')
    test_df = load_data(test_fname)
    train_x = np.load(train_clean)
    test_x = np.load(test_clean)
    embedding_matrix = np.load(embeds_clean)
    train_y = np.load(train_labels)

    target_labels = [
        'toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate'
    ]
    num_classes = len(target_labels)

    # ============= Load params of models =============
    params = Params(config)
    models = params.get('models')
    val_predictions_list = []
    test_predictions_list = []

    # ============ Train models =============
    for model_name in models:
        model_func = get_model(model_name, embedding_matrix, params)
        # =========== Training on folds ============
        batch_size = params.get(model_name).get('batch_size')

        logger.debug('Starting {0} training on folds...'.format(model_name))
        models, val_predictions = train_folds_catboost(
            train_x,
            train_y,
            params.get(model_name).get('num_folds'),
            batch_size,
            model_func,
            params.get(model_name).get('optimizer'),
            logger=logger)
        val_predictions = np.concatenate(val_predictions, axis=0)
        val_predictions_list.append(val_predictions)

        logger.debug('Predicting results...')
        test_predictions = []
        for fold_id, model in enumerate(models):
            test_predictions.append(
                model.predict(test_x, batch_size=batch_size))
        final_test_predictions = np.ones(test_predictions[0].shape)
        for fold_predict in test_predictions:
            final_test_predictions *= fold_predict
        final_test_predictions **= (1. / len(test_predictions))
        test_predictions_list.append(final_test_predictions)

    x_test = np.concatenate(test_predictions_list, axis=1)
    test_predicts_path = os.path.join(result_path, "catboost_x_test.npy")
    np.save(test_predicts_path, x_test)
    x_meta = np.concatenate(val_predictions_list, axis=1)
    val_predicts_path = os.path.join(result_path, "catboost_x_train.npy")
    np.save(val_predicts_path, x_meta)

    x_train_meta, x_val_meta, y_train_meta, y_val_meta = train_test_split(
        x_meta, train_y[:x_meta.shape[0]], test_size=0.20, random_state=42)
    meta_model = CatBoost(target_labels,
                          loss_function='Logloss',
                          iterations=1000,
                          depth=6,
                          learning_rate=0.03,
                          rsm=1)
    meta_model.fit(x_train_meta,
                   y_train_meta,
                   eval_set=(x_val_meta, y_val_meta),
                   use_best_model=True)
    #y_hat_meta = meta_model.predict_proba(x_val_meta)

    #metrics_meta = get_metrics(y_val_meta, y_hat_meta, target_labels)
    logger.info('Applying models...')

    final_predictions = np.array(meta_model.predict_proba(x_test)).T

    # ====Save results====
    logger.info('Saving results...')
    test_ids = test_df["id"].values
    test_ids = test_ids.reshape((len(test_ids), 1))

    test_predicts = pd.DataFrame(data=final_predictions, columns=target_labels)
    test_predicts["id"] = test_ids
    test_predicts = test_predicts[["id"] + target_labels]
    submit_path = os.path.join(result_path, "{0}.csv".format('catboost_folds'))
    test_predicts.to_csv(submit_path, index=False)
Example #15
0
def main():
    
    
    # dataset hyperparams
    sequence_length = 50
    augment_dataset = False

    # uncomment the below for dataset collection
    max_note = 127
    instruments = ['t']
    sample_rate = 12
    encoding_size = len(instruments) * (max_note + 1) * 2 + (sample_rate * 2)
    num_notes = len(instruments) * (max_note + 1)
    encoder, decoder = utils.build_convert_dict(max_note, instruments, sample_rate)
    # X, y = datacollector.collect_solo_songs(encoder, max_note, len(instruments), sample_rate, sequence_length,
    #                                         augment_dataset)

    X = pickle.load(open("x.pickle", "rb"))
    y = pickle.load(open('y.pickle', 'rb'))
    # pickle.dump(X, pickle_x)
    # pickle_x.close()
    # pickle.dump(y, pickle_y)
    # pickle_y.close()

    X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=0.02, random_state=49)
    X_train, X_val, y_train, y_val = train_test_split(X_t, y_t, test_size=0.02, random_state=7)
    print(X.shape, X_t.shape, X_test.shape, X_train.shape, X_val.shape)


    # # uncomment the below for training
   
    # train.train(X_train, y_train, encoding_size)

    # # uncomment the below for evaluation

    weights_name = 'weights.49-0.8625.hdf5'
    # evaluate.evaluate(X_train, y_train, X_val, y_val, X_test, y_test, encoding_size, weights_name)
    model = train.get_model(X_test, encoding_size)
    model.load_weights(weights_name)
    print('weights loaded')
    y_pred = model.predict(X_test)
    # y_pred = y_test
    # print(y_test)
    # print(y_pred)
    print('y_test')
    print(y_test.shape)
    print(y_test.argmax(axis=1).shape)
    # print(type(y_test))
    print("y_pred")
    print(y_pred.shape)
    print(y_pred.argmax(axis=1).shape)
    # print(type(y_pred))
    a = y_pred.argmax(axis = 1)
    # print(a)
    y_pred_hardmax = np.zeros(y_pred.shape)
    y_pred_hardmax[np.arange(a.shape[0]),a] = 1
    # print(y_pred_hardmax)
    # print(list(y_test.argmax(axis=0)))
    # print(list(y_pred_hardmax.argmax(axis=0)))
    print('AAAA')
    evaluate.plot_confusion_matrix('confusion_matrix.png', y_test.argmax(axis=1), y_pred.argmax(axis=1))
Example #16
0
from config import *
from data_utils import *
from train import get_model

max_len = 25
word2id, embedding_matrix, vocab = load_embeddings("glove.6B.100d.txt", 100)

model = get_model(len(word2id.keys()), embedding_dims, embedding_matrix,
                  max_len, emb_dropout, rnn_units, rnn_dropout,
                  recurrent_dropout)
model.load_weights("weights.h5")

while True:
    text = input("Enter sentence:")
    X = preprocess(text, word2id)
    print(X)
    print(model.predict(pad_sequence(X, max_len)))
Example #17
0
from train import get_model

if __name__ == '__main__':

    # GPU settings
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)

    # get the original_dataset
    train_dataset, valid_dataset, test_dataset, train_count, valid_count, test_count = generate_datasets(
    )
    # print(train_dataset)
    # load the model
    model = get_model(flag=0)
    model.load_weights(filepath=config.save_model_dir)

    # Get the accuracy on the test set
    loss_object = tf.keras.metrics.SparseCategoricalCrossentropy()
    test_loss = tf.keras.metrics.Mean()
    test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy()

    @tf.function
    def test_step(images, labels):
        predictions = model(images)
        t_loss = loss_object(labels, predictions)

        test_loss(t_loss)
        test_accuracy(labels, predictions)