Beispiel #1
0
def optimize_model():
    print("Start training model with data poisoning attacks!")
    last_rmse = None
    for iteration in xrange(n_iters):
        t1 = time.time()
        ALS(n_user, n_item, n_feature, mal_user, train, mean_rating_, mal_mean_rating_, mal_ratings, lamda_u, lamda_v, \
        user_features_, mal_user_features_, item_features_)
        train_preds = predict(train.take([0, 1], axis=1), user_features_,
                              item_features_, mean_rating_)
        train_rmse = RMSE(train_preds, train.take(2, axis=1))
        t2 = time.time()
        print("The %d th iteration \t time: %ds \t RMSE: %f " %
              (iteration + 1, t2 - t1, train_rmse))
        # stop when converge
        if last_rmse and abs(train_rmse - last_rmse) < converge:
            break
        else:
            last_rmse = train_rmse
    return last_rmse
Beispiel #2
0
def optimize_model_origin(converge, n_user, n_item, n_feature, train,
                          mean_rating_, lamda_u, lamda_v,
                          user_features_origin_, item_features_origin_):

    print("Start training model without data poisoning attacks!")
    last_rmse = None
    n_iters = 100
    for iteration in range(n_iters):
        t1 = time.time()
        user_features_origin_, item_features_origin_ = ALS_origin(
            n_user, n_item, n_feature, train, mean_rating_, lamda_u, lamda_v,
            user_features_origin_, item_features_origin_)
        train_preds = predict(train.take([0, 1], axis=1),
                              user_features_origin_, item_features_origin_)
        train_rmse = RMSE(train_preds, train.take(2, axis=1) - 3)
        t2 = time.time()
        print("The %d th iteration \t time: %ds \t RMSE: %f " %
              (iteration + 1, t2 - t1, train_rmse))
        # stop when converge
        if last_rmse and abs(train_rmse - last_rmse) < converge:
            break
        else:
            last_rmse = train_rmse
    return last_rmse
    if args['--output-dir'] is not None:
        os.environ["OUT_DIR"] = args['--output-dir']
    else:
        assert "OUT_DIR" in os.environ

    if args['--config'] is not None:
        config = load_config_from_file(args['--config'])
    else:
        from seq2seq import config

    print("Using configuration", config.__file__)

    if args['--test'] is True:
        test_file = config.filename_test
        eval_type = 'test'
    elif args['--dev'] is True:
        test_file = config.filename_dev
        eval_type = 'dev'
    else:
        raise ValueError('Specify --dev or --test.')

    config_holder = ConfigHolder(config)
    model = Model(config_holder)

    df = evaluation.predict(model, config_holder, test_file)
    acc_dict = evaluation.calculate_accuracy(df)
    acc_verbose = evaluation.accuracy_to_string_verbose(acc_dict)
    evaluation.save_results(df, acc_dict, acc_verbose, "Et-morf-yh", eval_type,
                            config.out_dir)
Beispiel #4
0
def run(args):
    # Add underscore to the tag
    args.tag = ("_" + args.tag) if args.tag is not None else ""
    # Parse prefix and postfix
    prefix = "{}{}".format("-Subword" if args.subword else "", "-Attention"
                             if args.attention else "")

    postfix = "{}{}{}".format("_subword" if args.subword else "",
                             ("_" + args.data_tag) if args.data_tag is not None else "",
                             ("_d" if args.description else ""))

    # Parse directory name
    if not args.model_dir.endswith("/"):
        args.model_dir += "/"
    if args.matching:
        print("Matching problem.")
    #########################################
    # Load models (TO-BE-REVISED)
    tokenizers = pkl.load(open(args.tokenizers, "rb"))
    n_classes = len(tokenizers["mlb"].classes_)
    try:
        desc_tokenizer = tokenizers["description"]
    except:
        desc_tokenizer = None
    #########################################
    # Building Model
    print("Building computational graph...")

    model = EntityTypingNet(
        architecture=args.arch,
        n_classes=n_classes,
        context_tokenizer=tokenizers["context"],
        mention_tokenizer=tokenizers["mention"],
        desc_tokenizer=desc_tokenizer,
        context_emb=args.context_emb,
        context_embedding_dim=args.context_embedding_dim,
        mention_emb=args.mention_emb,
        mention_embedding_dim=args.mention_embedding_dim,
        desc_emb=args.desc_emb,
        desc_embedding_dim=args.desc_embedding_dim,
        same_emb=args.same_emb,
        n_words=MAX_NUM_WORDS,
        n_mention=MAX_NUM_MENTION_WORDS,
        n_description=MAX_NUM_DESCRIPTION_WORDS,
        len_context=MAX_SEQUENCE_LENGTH,
        len_mention=MAX_MENTION_LENGTH,
        len_description=MAX_DESCRIPTION_LENGTH,
        attention=args.attention,
        subword=args.subword,
        indicator=args.indicator,
        description=False, # args.description,
        matching=args.matching,
        merge_mode=args.merge_mode,
        dropout=args.dropout,
        use_softmax=args.use_softmax,
        optimizer=args.optimizer,
        learning_rate=args.learning_rate)

    print(model.summary())

    # Save weights at the end of each epoch
    save_prefix = "{:s}{:s}-weights{:s}".format(args.arch, prefix, args.tag)
    filename = save_prefix + "-{epoch:02d}.hdf5"

    checkpoint = ModelCheckpoint(
        filename,
        monitor="val_loss",
        verbose=1,
        save_best_only=False,
        mode="min")
    early = EarlyStopping(monitor="val_loss", mode="min", patience=20)
    callbacks_list = [checkpoint, early]

    X_train, Z_train, y_train, D_train = load_pkl_data(
        args.model_dir, "training", postfix, indicator=args.indicator, matching=args.matching)
    ######################################################
    """
    print(X_train.shape, y_train.shape)
    print("Stacking positive samples")
    n_instance = X_train.shape[0] // 6
    idxs = [i * 6 for i in range(n_instance)]
    tmp = np.vstack([X_train[idxs] for _ in range(4)])
    X_train = np.vstack([X_train, tmp])
    del tmp
    tmp = np.vstack([Z_train[idxs] for _ in range(4)])
    Z_train = np.vstack([Z_train, tmp])
    del tmp
    tmp = np.hstack([y_train[idxs] for _ in range(4)])
    y_train = np.hstack([y_train, tmp])
    del tmp
    if args.description:
        tmp = np.vstack([D_train[idxs] for _ in range(4)])
        D_train = np.vstack([D_train, tmp])
    """
    ######################################################
    # input = [X_train, Z_train]
    print(X_train.shape, Z_train.shape, y_train.shape)

    #if args.use_softmax:
    #    y_train =  np.array(mlb.inverse_transform(y_train)).flatten()
    input = [X_train, Z_train, D_train] if args.description else [X_train, Z_train]
    print("Begin training...")
    model.fit(
        input,
        y_train,
        batch_size=args.batch_size,
        epochs=args.epochs,
        validation_split=0.01,
        callbacks=callbacks_list)

    # Evaluation
    record = 0
    index = 0

    X_val, Z_val, y_val, D_val = load_pkl_data(
        args.model_dir, "validation", postfix, indicator=args.indicator, description=args.description)

    print("Loading trained weights for validation...")
    for i in range(1, args.epochs + 1, 1):
        # Deal with model_name for each epoch
        model_name = "{:s}-{:02d}.hdf5".format(save_prefix, i)
        model.load_weights(model_name)

        f = predict(
            model,
            X_val,
            Z_val,
            y_val,
            model_name,
            "results.txt",
            return_mf1=True,
            use_softmax=args.use_softmax)

        # Always choose model trained with more epoch when the F-1 score is same
        if record <= f:
            record = f
            index = i

    print("\n * Best micro-F1 at Validation: epoch #{:02d}".format(index))
    # Test model with best micro F1 score
    model_name = "{:s}-{:02d}.hdf5".format(save_prefix, index)
    just_test(
        model=model,
        filename=model_name,
        postfix=postfix,
        use_softmax=args.use_softmax,
        indicator=args.indicator)

    K.clear_session()
Beispiel #5
0
    def get(self, link_video):
        videofile = "/video/video.mp4"
        for x in Data:
            if x['Data'] == link_video:

                #os.system("./predict unseen-weights178.h5 anshu.mp4")

                from lipnet.lipreading.videos import Video
                from lipnet.lipreading.visualization import show_video_subtitle
                from lipnet.core.decoders import Decoder
                from lipnet.lipreading.helpers import labels_to_text
                from lipnet.utils.spell import Spell
                from lipnet.model2 import LipNet
                from keras.optimizers import Adam
                from keras import backend as K
                import numpy as np
                import sys
                import os

                np.random.seed(55)

                CURRENT_PATH = os.path.dirname(os.path.abspath(__file__))

                FACE_PREDICTOR_PATH = os.path.join(
                    CURRENT_PATH, '..', 'common', 'predictors',
                    'shape_predictor_68_face_landmarks.dat')

                PREDICT_GREEDY = False
                PREDICT_BEAM_WIDTH = 200
                PREDICT_DICTIONARY = os.path.join(CURRENT_PATH, '..', 'common',
                                                  'dictionaries', 'grid.txt')

                def predict(weight_path,
                            video_path,
                            absolute_max_string_len=32,
                            output_size=28):
                    print "\nLoading data from disk..."
                    video = Video(vtype='face',
                                  face_predictor_path=FACE_PREDICTOR_PATH)
                    if os.path.isfile(video_path):
                        video.from_video(video_path)
                    else:
                        video.from_frames(video_path)
                    print "Data loaded.\n"

                    if K.image_data_format() == 'channels_first':
                        img_c, frames_n, img_w, img_h = video.data.shape
                    else:
                        frames_n, img_w, img_h, img_c = video.data.shape

                    lipnet = LipNet(
                        img_c=img_c,
                        img_w=img_w,
                        img_h=img_h,
                        frames_n=frames_n,
                        absolute_max_string_len=absolute_max_string_len,
                        output_size=output_size)

                    adam = Adam(lr=0.0001,
                                beta_1=0.9,
                                beta_2=0.999,
                                epsilon=1e-08)

                    lipnet.model.compile(loss={
                        'ctc': lambda y_true, y_pred: y_pred
                    },
                                         optimizer=adam)
                    lipnet.model.load_weights(weight_path)

                    spell = Spell(path=PREDICT_DICTIONARY)
                    decoder = Decoder(
                        greedy=PREDICT_GREEDY,
                        beam_width=PREDICT_BEAM_WIDTH,
                        postprocessors=[labels_to_text, spell.sentence])

                    X_data = np.array([video.data]).astype(np.float32) / 255
                    input_length = np.array([len(video.data)])

                    y_pred = lipnet.predict(X_data)
                    result = decoder.decode(y_pred, input_length)[0]

                    return (video, result)

                video, result = predict("unseen-weights178.h5", "anshu.mp4")
                return result

        return {'Data': None}
Beispiel #6
0
    else:
        os.environ["OUT_DIR"] = args['--output-dir']

    if args['--config'] is not None:
        config = load_config_from_file(args['--config'])
    else:
        from mcml import config

    print("Using configuration", config.__file__)

    if args['--test'] is True:
        test_file = config.filename_test
        eval_type = 'test'
    elif args['--dev'] is True:
        test_file = config.filename_dev
        eval_type = 'dev'
    else:
        raise ValueError('Specify --dev or --test.')

    config_holder = ConfigHolder(config)
    model = Model(config_holder)

    df = evaluation.predict(model,
                            config_holder,
                            test_file,
                            predict_sentence_callback=predict_sentence)
    acc_dict = evaluation.calculate_accuracy(df)
    acc_verbose = evaluation.accuracy_to_string_verbose(acc_dict)
    evaluation.save_results(df, acc_dict, acc_verbose, lang_key, eval_type,
                            config.out_dir)
Beispiel #7
0
def evaluate(model, config_holder, test_file, lang_key, eval_type, out_dir):
    df = evaluation.predict(model, config_holder, test_file)
    acc_dict = evaluation.calculate_accuracy(df)
    acc_verbose = evaluation.accuracy_to_string_verbose(acc_dict)
    evaluation.save_results(df, acc_dict, acc_verbose, lang_key, eval_type, out_dir)