Esempio n. 1
0
def index_images(folder, features_path, mapping_path, model, glove_path):
    print("Now indexing images...")
    word_vectors = utils.load_glove_vectors(glove_path)
    _, _, paths = utils.load_paired_img_wrd(folder=folder,
                                            word_vectors=word_vectors)
    images_features, file_index = utils.generate_features(paths, model)
    utils.save_features(features_path, images_features, mapping_path,
                        file_index)
    return images_features, file_index
Esempio n. 2
0
def main():

    utils.set_up_data_directories()

    snapshots = {}
    parameters = {}
    for dataset in config.datasets:
        # shape: N_h x N
        # i.e. #DOFs x #snapshots
        snapshots[dataset] = utils.load_snapshots(dataset)
        parameters[dataset] = utils.load_parameters(dataset)

    for component in config.components:
        assert config.datasets[0] == 'train', 'The first dataset must be train'
        print(f'\nComputing targets for component {component}')

        for dataset in config.datasets:
            # Snapshot matrix, non-centered
            S_n = utils.reduce(snapshots[dataset], component)

            if dataset == 'train':
                # Compute and store ..
                # .. mean and POD
                S_mean = np.mean(S_n, axis=1)
                S = np.array([col - S_mean for col in S_n.T]).T
                V, D = do_POD(S)
                utils.save_POD(V, D, S_mean, component)
                # .. scaler
                scaler = StandardScaler()
                scaler.fit(parameters[dataset])
                utils.save_scaler(scaler)
            else:
                # Compute centered snapshot matrix
                S = np.array([col - S_mean for col in S_n.T]).T

            # Now V, D, S_mean and scaler are available

            # Compute and store ..
            # .. features
            features = compute_features(scaler, parameters[dataset])
            utils.save_features(dataset, features)
            # .. targets
            targets = compute_targets(S, V, D)
            utils.save_targets(dataset, component, targets)
            # .. projection error
            err_POD_sq = compute_error_POD_sq(S, V, D)
            utils.save_error_POD_sq(dataset, component, err_POD_sq)
def main():
    if not os.path.exists('myData.h5py'):
        # prepare the data
        stereo_to_mono(stereo_folder, groundtruth_folder)
        compress(groundtruth_folder, input_folder)
        stereo_to_mono(eval_stereo_folder, eval_groundtruth_folder)
        compress(eval_groundtruth_folder, eval_input_folder)

        # extract features
        gt_features, _ = get_features(groundtruth_folder)
        input_features, _ = get_features(input_folder)
        eval_gt_features, _ = get_features(eval_groundtruth_folder)
        eval_input_features, _ = get_features(eval_input_folder)

        # shuffle features
        gt_features, input_features = unison_shuffled_copies(
            gt_features, input_features)
        eval_gt_features, eval_input_features = unison_shuffled_copies(
            eval_gt_features, eval_input_features)

        # save features
        save_features('myData.h5py', input_features, eval_input_features,
                      gt_features, eval_gt_features)
Esempio n. 4
0
                    split_tag.append(i)
            if len(parsed_words) - 1 not in split_tag:
                split_tag.append(len(parsed_words) - 1)
            sent_sub = []
            for i in range(1, len(split_tag)):
                st, ed = split_tag[i - 1], split_tag[i]
                sent_sub.append(' '.join(parsed_words[st + 1:ed + 1]).strip())

            ss_data = []
            te = 0
            for ss in sent_sub:
                time_elapsed, parsed_sents, segments = compSP.predict_batch_prob(ss, ref, args.max_token, args.min_word, scaling=0, printout=False)
                ss_data.append((time_elapsed, parsed_sents, segments))
                te += time_elapsed
            te /= len(sent_sub)

            sent_data.append(ss_data)

            sum_elapsed_time += te
            sum_sent += 1
            tbar.set_description('[{}/{} sentence] avg. time/sent: {:.3f}'.format(si, len(doc), sum_elapsed_time / sum_sent))

        doc_data.append((name, sent_data))

        if (di + 1) % args.save_freq == 0 or (di + 1) == len(texts):
            text_format = '{}{:05d}.pkl'.format(args.split, save_id)
            output_path = os.path.join(output_dir, text_format)
            save_features(output_path, doc_data)
            doc_data = []
            save_id += 1
Esempio n. 5
0
    del tr_titles, tr_desc, X_train

    gc.collect()
    X_va = hstack([csr_matrix(X_val.drop(columns_to_drop, axis=1)), va_titles, va_desc])
    y_va = X_val['deal_probability']
    del va_titles, va_desc, X_val
    gc.collect()
    X_te = hstack([csr_matrix(X_test.drop(columns_to_drop, axis=1)), te_titles, te_desc])

    del te_titles, te_desc, X_test

    gc.collect()


    if nrows is None:
        utils.save_features(X_tr, xgb_root, "X_train")
        utils.save_features(X_va, xgb_root, "X_val")
        utils.save_features(X_te, xgb_root, "test")
        utils.save_features(y_tr, xgb_root, "y_train")
        utils.save_features(y_va, xgb_root, "y_val")

elif args.feature == "load":
    print("[+] Load features ")
    X_tr = utils.load_features(xgb_root, "X_train").any()
    X_va = utils.load_features(xgb_root, "X_val").any()
    X_te = utils.load_features(xgb_root, "test").any()
    y_tr = utils.load_features(xgb_root, "y_train")
    y_va = utils.load_features(xgb_root, "y_val")
    print("[+] Done ")
    X = vstack([X_tr, X_va])
    y = np.concatenate((y_tr, y_va))
Esempio n. 6
0
        csr_matrix(data_va.drop(columns_to_drop, axis=1)), va_titles, va_desc
    ])
    y_va = data_va['deal_probability']
    del va_titles, va_desc, data_va
    gc.collect()
    X_te = hstack([
        csr_matrix(data_te.drop(columns_to_drop, axis=1)), te_titles, te_desc
    ])

    del te_titles, te_desc, data_te

    gc.collect()

    ################################################################################
    # if nrows is None:
    utils.save_features(X_tr, lgbm_dir, "X_train")
    utils.save_features(X_va, lgbm_dir, "X_val")
    utils.save_features(X_te, lgbm_dir, "test")
    utils.save_features(y_tr, lgbm_dir, "y_train")
    utils.save_features(y_va, lgbm_dir, "y_val")
    ################################################################################
elif args.feature == "load":
    print("[+] Load features ")
    X_tr = utils.load_features(lgbm_dir, "X_train").any()
    X_va = utils.load_features(lgbm_dir, "X_val").any()
    X_te = utils.load_features(lgbm_dir, "test").any()
    y_tr = utils.load_features(lgbm_dir, "y_train")
    y_va = utils.load_features(lgbm_dir, "y_val")
    print("[+] Done ")
    X = vstack([X_tr, X_va])
    y = np.concatenate((y_tr, y_va))
    for i, split in enumerate(splits):
        print(input_path + '/{}*'.format(split))
        files = sorted(glob.glob(input_path + '/{}*'.format(split)))
        print('{} files are found'.format(len(files)))

        filename = os.path.join(output_path, split + '.pkl')
        filename_stats = os.path.join(output_path, split + '_stats.pkl')

        if not os.path.exists(filename):
            st_time = time.time()

            # merge data based on filtering rule
            data_all, full_sent_pos, seg_num_doc = data_processing(
                files, args.seg_per_sent, args.debug)

            save_features(filename, data_all)
            save_features(filename_stats, [full_sent_pos, seg_num_doc])

            print('total num. sentences', len(full_sent_pos))
            print('elapsed time: {:.3f}s'.format(time.time() - st_time))
        else:
            data_all = load_features(filename)
            full_sent_pos, seg_num_doc = load_features(filename_stats)
            print('data is loaded from {} and {}'.format(
                filename, filename_stats))

        full_sent_pos_list.append(full_sent_pos)
        print_stats(seg_num_doc, '{}-seg_num_doc'.format(split))

    # draw data stats
    draw_stats(full_sent_pos_list, splits, data_name)
Esempio n. 8
0
def main():
    # Load json config
    config = json.load(open("config.json"))

    print("[+] Load csv ...")
    train_df = load_csv(config["train_csv"])
    test_df = load_csv(config["test_csv"])

    df = pd.concat([train_df, test_df])
    del train_df
    del test_df
    gc.collect()

    print("[+] Log price ...")
    df["price"] = df["price"].apply(np.log1p)
    df["price"] = df["price"].apply(lambda x: -1 if x == -np.inf else x)

    print("[+] Create time features ...")
    df["mon"] = df["activation_date"].dt.month
    df["mday"] = df["activation_date"].dt.day
    df["week"] = df["activation_date"].dt.week
    df["wday"] = df["activation_date"].dt.weekday

    cat_vars = ["category_name", "parent_category_name", "region", "user_type"]

    print("[+] Label categories ...")
    for cat in cat_vars:
        df[cat] = LabelEncoder().fit_transform(df[cat].values)

    txt_vars = [
        "city", "param_1", "param_2", "param_3", "title", "description"
    ]

    print("[+] Merge text ...")
    for txt in txt_vars:
        df[txt] = df[txt].astype("str")

    df["txt"] = ""
    for txt in txt_vars:
        df["txt"] += df[txt]

    delete_columns = [
        "item_id", "user_id", "city", "param_1", "param_2", "param_3", "title",
        "description", "activation_date", "image"
    ]

    print("[+] Delete unused columns ...")
    for c in delete_columns:
        df = df.drop(c, axis=1)

    print("[+] Extract TFIDF  ...")
    df["txt"] = df["txt"].apply(lambda x: x.lower())
    df["txt"] = df["txt"].replace("[^[:alpha:]]", " ", regex=True)
    df["txt"] = df["txt"].replace("\\s+", " ", regex=True)
    tfidf_vec = TfidfVectorizer(ngram_range=(1, 3),
                                sublinear_tf=True,
                                stop_words=stopWords_rus,
                                max_features=5500)
    full_tfidf = tfidf_vec.fit_transform(df['txt'].values.tolist())
    # for i in range(5500):
    #     df['tfidf_' + str(i)] = full_tfidf[:, i]

    extract_columns = [
        'region', 'parent_category_name', 'category_name', 'price',
        'item_seq_number', 'user_type', 'image_top_1', 'mon', 'mday', 'week',
        'wday'
    ]

    print("[+] Stack more features  ...")
    for c in extract_columns:
        full_tfidf = hstack([full_tfidf, df[c].as_matrix()])
    full_tfidf = full_tfidf.tocsr()
    print("[+] Create y_train ...")
    y_train = train_df["deal_probability"].as_matrix()
    y_train = np.asarray(y_train)

    extracted_features_root = config["extracted_features"]
    utils.save_features(full_tfidf.tocsr(),
                        root=extracted_features_root,
                        name="X_train_xgboost")
    utils.save_features(y_train,
                        root=extracted_features_root,
                        name="y_train_xgboost")
Esempio n. 9
0
X_train, y_train, X_test, y_test, num_classes = dataset.load_Iris(0.3)

# model setup
layers = [Vector(args.layers, eta=args.eta, eps=args.eps)]
model = Architecture(layers, model_dir, num_classes)

# train/test pass
print("Forward pass - train features")
Z_train = model(X_train, y_train)
utils.save_loss(model.loss_dict, model_dir, "train")
print("Forward pass - test features")
Z_test = model(X_test)
utils.save_loss(model.loss_dict, model_dir, "test")

# save features
utils.save_features(model_dir, "X_train", X_train, y_train)
utils.save_features(model_dir, "X_test", X_test, y_test)
utils.save_features(model_dir, "Z_train", Z_train, y_train)
utils.save_features(model_dir, "Z_test", Z_test, y_test)

# evaluation train
_, acc_svm = evaluate.svm(Z_train, y_train, Z_train, y_train)
acc_knn = evaluate.knn(Z_train, y_train, Z_train, y_train, k=5)
acc_svd = evaluate.nearsub(Z_train, y_train, Z_train, y_train, n_comp=1)
acc = {"svm": acc_svm, "knn": acc_knn, "nearsub-svd": acc_svd}
utils.save_params(model_dir, acc, name="acc_train.json")

# evaluation test
_, acc_svm = evaluate.svm(Z_train, y_train, Z_test, y_test)
acc_knn = evaluate.knn(Z_train, y_train, Z_test, y_test, k=5)
acc_svd = evaluate.nearsub(Z_train, y_train, Z_test, y_test, n_comp=1)
Esempio n. 10
0
skf = KFold(n_folds)

for fold, (train_index, val_index) in enumerate(skf.split(X)):
    print(f"\n[+] Fold {fold}")
    if fold in skip_fold:
        print(f"[+] Fold {fold} is skipped")
        continue

    X_train = X[train_index]
    y_train = y[train_index]

    X_valid = X[val_index]
    y_valid = y[val_index]

    # Save val index and test index to file
    utils.save_features(np.asarray(train_index), lgb_root,
                        f"train_index_fold_{fold}")
    utils.save_features(np.asarray(val_index), lgb_root,
                        f"val_index_fold_{fold}")

    print("Light Gradient Boosting Regressor")
    lgbm_params = {
        'task': 'train',
        'boosting_type': 'gbdt',
        'objective': 'regression',
        'metric': 'rmse',
        'max_depth': 15,
        'num_leaves': 35,
        'feature_fraction': 0.7,
        'bagging_fraction': 0.8,
        # 'bagging_freq': 5,
        'learning_rate': 0.019,
def run_predict(args,
                model,
                tokenizer,
                logger,
                batch_size,
                cuda_dev,
                is_sim_running=False,
                is_run_force=False):
    if args.dataset == 0:
        # DUC
        duc_base = os.path.dirname(args.DUC_data_path[0])
        if args.split == 'train':
            data_path = os.path.join(args.base_path, duc_base, args.data_type,
                                     'train')
            sum_path = data_path
        else:
            data_path = os.path.join(args.base_path, duc_base, args.data_type,
                                     'test')
            sum_path = data_path

        text_cls = readDUCorTACText(data_path,
                                    sum_path=sum_path,
                                    is_duc=True,
                                    data_st=args.data_start,
                                    data_en=args.data_end)
    elif args.dataset == 1:
        # TAC
        tac_base = os.path.dirname(args.TAC_data_path[0])
        if args.split == 'train':
            data_path = os.path.join(args.base_path, tac_base, args.data_type,
                                     'train')
            sum_path = data_path
        else:
            data_path = os.path.join(args.base_path, tac_base, args.data_type,
                                     'test')
            sum_path = data_path

        text_cls = readDUCorTACText(data_path,
                                    sum_path=sum_path,
                                    is_duc=False,
                                    data_st=args.data_start,
                                    data_en=args.data_end)

    BERT_base_dir = os.path.join(data_path, 'BERT_features', 'extractions')
    if not os.path.exists(BERT_base_dir):
        os.makedirs(BERT_base_dir)

    # retrieve text data
    text_docs = text_cls.text
    Y = text_cls.Y
    name = text_cls.name
    pos = text_cls.pos
    seg_pos = text_cls.seg

    y_name_pos_file = os.path.join(
        BERT_base_dir,
        '{}_y_name_pos_{}-{}.pkl'.format(args.split, args.data_start,
                                         args.data_end))
    save_features(y_name_pos_file, {'Y': Y, 'name': name, 'pos': pos})
    logger.write('docs files are saved in {}.'.format(y_name_pos_file))

    pred_fn = 'sim' if is_sim_running else 'imp'
    pred_fn = '{}_{}_{}-{}'.format(args.split, pred_fn, args.data_start,
                                   args.data_end)
    pred_file = os.path.join(BERT_base_dir, pred_fn)

    pool_fn = 'imp_vector'
    pool_fn = '{}_{}_{}-{}'.format(args.split, pool_fn, args.data_start,
                                   args.data_end)
    if not is_sim_running:
        pool_file = os.path.join(BERT_base_dir, pool_fn)

    if not os.path.exists(pred_file) or is_run_force:
        st_ext = time.time()

        if is_sim_running:
            pred_list = predict_sim(model, tokenizer, text_docs, 128,
                                    batch_size, cuda_dev, name, seg_pos)
        else:
            pred_list, pool_list = predict_imp(model, tokenizer, text_docs,
                                               pos, 512, batch_size, cuda_dev)

        elpased_time = time.time() - st_ext
        logger.write('prediction time for {} is {}sec: avg. {}sec/doc.'.format(
            data_path, elpased_time, elpased_time / len(text_docs)))

        save_features(pred_file, pred_list)
        logger.write('{} file stored!'.format(pred_file))
        if not is_sim_running:
            save_features(pool_file, pool_list)
            logger.write('{} file stored!'.format(pool_file))
    else:
        logger.write('{} file exists... skip prediction!'.format(pred_file))
        if not is_sim_running:
            logger.write(
                '{} file exists... skip prediction!'.format(pool_file))
Esempio n. 12
0
                cls_scores = cls_prob[:, cls_ind]
                dets = np.hstack((boxes, cls_scores[:, np.newaxis])).astype(np.float32)
                keep = np.array(cpu_nms(dets, cfg.TEST.NMS))
                max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep])

            keep_boxes = np.where(max_conf >= CONF_THRESH)[0]
            if len(keep_boxes) < MIN_BOXES:
                keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES]
            elif len(keep_boxes) > MAX_BOXES:
                keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES]
          
            image_feat = pooled_feat[keep_boxes]
            if args.save_boxes:
                image_bboxes = boxes[keep_boxes]
                all_images_boxes.append(image_bboxes)
            else:
                all_images_boxes = None

            all_images_feats.append(image_feat)

            #torch.cuda.empty_cache()

    #transforms array to numpy array
    all_images_feats = np.asarray(all_images_feats, dtype=np.float16)
    if args.save_boxes:
      all_images_boxes = np.asarray(all_images_boxes, dtype=np.float16)

    #save the files
    output_file = os.path.join(args.output_dir, args.out_file_name+'.npy')
    save_features(output_file, all_images_feats, all_images_boxes)
Esempio n. 13
0
    #
    # save features for VGG16 at 3 different input scales
    # from keras.applications.vgg16 import VGG16
    # from keras.applications.vgg16 import preprocess_input
    # model = VGG16(weights='imagenet', include_top=False)
    #
    # for n in [224,128,64]:
    #     input_shape = (n,n,3)
    #     new_preprocess = lambda x: preprocess_input(utils.pad_image(x, input_shape))
    #     features = utils.features_from_image(all_logos, model, new_preprocess)
    #     utils.save_features('vgg16_logo_features_{}.hdf5'.format(n), features, brand_map, input_shape)

    from keras.applications.nasnet import NASNetMobile
    from keras.applications.nasnet import preprocess_input
    model_out = NASNetMobile(weights='imagenet', include_top=False)
    input_shape = (224, 224, 3)

    new_preprocess = lambda x: preprocess_input(utils.pad_image(
        x, input_shape))
    features = utils.features_from_image(all_logos, model, new_preprocess)
    utils.save_features('NASNet_logo_features_{}.hdf5'.format(224), features,
                        brand_map, input_shape)

    # from keras.applications.nasnet import NASNetLarge
    # from keras.applications.nasnet import preprocess_input
    # model_out = NASNetLarge(weights='imagenet', include_top=False)
    # input_shape = (331, 331, 3)
    #
    # new_preprocess = lambda x: preprocess_input(utils.pad_image(x, input_shape))
    # features = utils.features_from_image(all_logos, model, new_preprocess)
    # utils.save_features('NASNet_logo_features_{}.hdf5'.format(331), features, brand_map, input_shape)
Esempio n. 14
0
def main():
    # Load json config
    config = json.load(open("config.json"))

    with utils.timer("Load csv"):
        print("[+] Load csv ...")
        train_df = load_csv(config["train_csv"])
        test_df = load_csv(config["test_csv"])

    with utils.timer("Create token"):
        print("[+] Create token ...")
        token = create_token(train_df)

    with utils.timer("Tokenize data"):
        print("[+] Tokenize data ...")
        train_token_data = tokenize_data(train_df, token)
        test_token_data = tokenize_data(test_df, token)

    y_train = train_df["deal_probability"].as_matrix()
    train_df = train_df.drop("deal_probability", axis=1)

    df = pd.concat([train_df, test_df], ignore_index=True)
    n_train = len(train_df)
    del train_df
    del test_df
    gc.collect()

    with utils.timer("Extract time features"):
        print("[+] Convert date to day of week ...")
        df = date_to_dow(df)

    with utils.timer("Extract text features as numeric"):
        print("[+] Extract text features as numeric ...")
        df['text_feat'] = df.apply(lambda row: ' '.join(
            [str(row['param_1']),
             str(row['param_2']),
             str(row['param_3'])]),
                                   axis=1)  # Group Param Features
        df = extract_text_features_as_numeric(df)

    with utils.timer("Extract params text features"):
        print("[+] Extract params text features ...")
        param_tfidf = extract_params_tex_features(df)

    with utils.timer("Extract title features"):
        print("[+] Extract title features ...")
        df, title_tfidf = title_features(df)

    with utils.timer("Extract description features"):
        print("[+] Extract description features ...")
        df, description_tfidf = description_features(df)

    with utils.timer("Extract price features"):
        print("[+] Extract price features ...")
        df = log_prices(df)

    X_num = []
    print("[+] Extract numerical features ...")
    for c in num_columns:
        X_num.append(df[c].as_matrix())

    # Numeric data
    X_num = np.array(X_num, dtype=np.float32).T
    X_train_num = X_num[:n_train]
    X_test_num = X_num[n_train:]
    print(f"[+] Numeric {X_train_num.shape}/{X_test_num.shape}")

    del X_num
    gc.collect()

    # Categorical data
    X_train_cat = np.array(train_token_data, dtype=np.int).T
    X_test_cat = np.array(test_token_data, dtype=np.int).T
    print(f"[+] Cat {X_train_cat.shape}/{X_test_cat.shape}")

    X_train_desc = description_tfidf[:n_train]
    X_test_desc = description_tfidf[n_train:]
    print(f"[+] Description {X_train_desc.shape}/{X_test_desc.shape}")

    X_train_title = title_tfidf[:n_train]
    X_test_title = title_tfidf[n_train:]
    print(f"[+] Title {X_train_title.shape}/{X_test_title.shape}")

    X_train_param = param_tfidf[:n_train]
    X_test_param = param_tfidf[n_train:]
    print(f"[+] Param {X_train_param.shape}/{X_test_param.shape}")

    print("[+] Save features ...")

    y_train = np.asarray(y_train)
    # Save token len
    token_len = [len(t) for t in token]

    extracted_features_root = config["extracted_features"]
    utils.save_features(X_train_num,
                        root=extracted_features_root,
                        name="X_train_num")

    utils.save_features(X_test_num,
                        root=extracted_features_root,
                        name="X_test_num")

    utils.save_features(X_train_cat,
                        root=extracted_features_root,
                        name="X_train_cat")

    utils.save_features(X_test_cat,
                        root=extracted_features_root,
                        name="X_test_cat")

    utils.save_features(X_train_desc,
                        root=extracted_features_root,
                        name="X_train_desc")

    utils.save_features(X_test_desc,
                        root=extracted_features_root,
                        name="X_test_desc")

    utils.save_features(X_train_title,
                        root=extracted_features_root,
                        name="X_train_title")

    utils.save_features(X_test_title,
                        root=extracted_features_root,
                        name="X_test_title")

    utils.save_features(X_train_param,
                        root=extracted_features_root,
                        name="X_train_param")

    utils.save_features(X_test_param,
                        root=extracted_features_root,
                        name="X_test_param")

    utils.save_features(y_train, root=extracted_features_root, name="y_train")

    utils.save_features(np.asarray(token_len),
                        root=extracted_features_root,
                        name="token_len")
Esempio n. 15
0
 feature_batch_counter = 0
 tbar_batch_counter = 0
 for i, data in enumerate(dataloader, 0):
     # if save_features, save at the beginning of an epoch
     if opt.feature_save and epoch % opt.feature_save_every == 0 and feature_batch_counter < opt.feature_num_batches:
         if len(feature_batches) < opt.feature_num_batches:
             eval_x, eval_y = data
             eval_x = eval_x.cuda()
             feature_batches.append((eval_x, eval_y))
         # feature for real
         eval_x, eval_y = feature_batches[feature_batch_counter]
         with torch.no_grad():
             eval_f = netD.get_feature(eval_x)
         utils.save_features(
             eval_f.cpu().numpy(),
             os.path.join(
                 outff,
                 f'real_epoch_{epoch}_batch_{feature_batch_counter}_f.npy'))
         utils.save_features(
             eval_y.cpu().numpy(),
             os.path.join(
                 outff,
                 f'real_epoch_{epoch}_batch_{feature_batch_counter}_y.npy'))
         # feature for fake
         with torch.no_grad():
             eval_x = netG(feature_eval_noises[feature_batch_counter],
                           feature_eval_labels[feature_batch_counter])
             eval_y = feature_eval_labels[feature_batch_counter]
             eval_f = netD.get_feature(eval_x)
         utils.save_features(
             eval_f.cpu().numpy(),
Esempio n. 16
0
        boxes /= im_scales[0]
        cls_prob = cls_prob.data.cpu().numpy().squeeze()
        pooled_feat = pooled_feat.data.cpu().numpy()

        # Keep only the best detections.
        max_conf = np.zeros((boxes.shape[0]))
        for cls_ind in range(1, cls_prob.shape[1]):
            cls_scores = cls_prob[:, cls_ind]
            dets = np.hstack(
                (boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = np.array(cpu_nms(dets, cfg.TEST.NMS))
            max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep],
                                      cls_scores[keep], max_conf[keep])

        keep_boxes = np.where(max_conf >= CONF_THRESH)[0]
        if len(keep_boxes) < MIN_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES]
        elif len(keep_boxes) > MAX_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES]

        image_feat = pooled_feat[keep_boxes]
        if args.save_boxes:
            image_bboxes = boxes[keep_boxes]
        else:
            image_bboxes = None

        output_file = os.path.join(args.output_dir,
                                   im_file.split('.')[0] + '.npy')
        save_features(output_file, image_feat, image_bboxes)
        #torch.cuda.empty_cache()
Esempio n. 17
0
def compute_flfeats_offline(source_path,
                            out_dir,
                            feat_type,
                            deltas=None,
                            config_file=None):
    """Function to calculate the frame-level features and save them to files.
    The function saves one file (containing features) per utterance
    Args:
        source_path (string): Path to the wavs.
        out_dir (string): Type of the frame-level feature to extract from the utterances.
                          Choose from: 'mfcc', 'fbanks', 'melspec'. Default is: 'fbanks'.
        feat_type (string): Type of the frame-level feature to extract from the utterances.
                            Choose from: 'mfcc', 'fbanks', 'melspec'. Default is: 'fbanks'.
        deltas (int, optional): Compute delta coefficients of a tensor. '1' for first order derivative,
                                '2' for second order. None for not using deltas. Default: None.
        config_file (string): Path to the configuration file (ini).
    """
    list_wavs = utils.get_files_abspaths(path=source_path, file_type='.wav')
    # frame-level feats params/config from the config file
    params = utils.read_conf_file(file_name=config_file,
                                  conf_section='DEFAULTS')

    print("Computing {} for {} utterances in {}...".format(
        feat_type, len(list_wavs), source_path))

    for wav_file in list_wavs:
        # Load wav
        waveform = utils.load_wav_torch(wav_file,
                                        max_length_in_seconds=5,
                                        pad_and_truncate=True)

        # Compute without derivatives
        if deltas == 0:
            # Compute features
            feat = execute_extraction_function(feat_type=feat_type,
                                               waveform=waveform,
                                               **params)
            final_dir = out_dir + '/{0}/{1}/'.format(
                feat_type, os.path.basename(source_path))
            utils.save_features(final_dir, feat_type, wav_file, feat)
            utils.copy_conf(config_file, final_dir, feat_type)

        # Compute derivatives if asked for
        if deltas == 1:
            # Compute features
            feat = execute_extraction_function(feat_type=feat_type,
                                               waveform=waveform,
                                               **params)
            delta1 = torchaudio.functional.compute_deltas(
                feat)  # compute 1st order
            feat = torch.cat((feat, delta1), 1)
            final_dir = out_dir + '/{0}/{1}/'.format(
                feat_type, os.path.basename(source_path))
            utils.save_features(final_dir, feat_type, wav_file, feat)
            utils.copy_conf(config_file, final_dir, feat_type)

        if deltas == 2:
            # Compute features
            feat = execute_extraction_function(feat_type=feat_type,
                                               waveform=waveform,
                                               **params)
            delta1 = torchaudio.functional.compute_deltas(
                feat)  # compute 1st order
            delta2 = torchaudio.functional.compute_deltas(delta1)
            feat = torch.cat((feat, delta1, delta2), 1)
            final_dir = out_dir + '/{0}/{1}/'.format(
                feat_type, os.path.basename(source_path))
            utils.save_features(final_dir, feat_type, wav_file, feat)
            utils.copy_conf(config_file, final_dir, feat_type)
Esempio n. 18
0
            pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        boxes = rois.data.cpu().numpy()[:, :, 1:5].squeeze()
        boxes /= im_scales[0]
        cls_prob = cls_prob.data.cpu().numpy().squeeze()
        pooled_feat = pooled_feat.data.cpu().numpy()

        # Keep only the best detections.
        max_conf = np.zeros((boxes.shape[0]))
        for cls_ind in range(1, cls_prob.shape[1]):
            cls_scores = cls_prob[:, cls_ind]
            dets = np.hstack((boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = np.array(cpu_nms(dets, cfg.TEST.NMS))
            max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep], cls_scores[keep], max_conf[keep])

        keep_boxes = np.where(max_conf >= CONF_THRESH)[0]
        if len(keep_boxes) < MIN_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES]
        elif len(keep_boxes) > MAX_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES]
       
        image_feat = pooled_feat[keep_boxes]
        if args.save_boxes:
            image_bboxes = boxes[keep_boxes]
        else:
            image_bboxes = None    

        output_file = os.path.join(args.output_dir, im_file.split('.')[0]+'.npy')
        save_features(output_file, image_feat, image_bboxes, keep_boxes, cls_prob)
        #torch.cuda.empty_cache()
Esempio n. 19
0
    def __call__(self, sample, wav_file, name_set):
        waveform, label = sample['wave'], sample['label']
        save = self.save
        config_file = self.config_file
        deltas = self.deltas
        out_dir = self.out_dir

        # frame-level feats params/config
        params = utils.read_conf_file(file_name=config_file,
                                      conf_section='DEFAULTS')

        # check if features are already computed if features do not exist, then compute them
        wav_name = os.path.splitext(os.path.basename(wav_file))[0]
        file_name = '/{0}_{1}'.format(self.feat_type, wav_name)
        feat_file_path = out_dir + '/' + file_name
        if not os.path.isfile(feat_file_path):
            # Compute without derivatives
            if deltas == 0:
                # Compute features
                feat = execute_extraction_function(feat_type=self.feat_type,
                                                   waveform=waveform,
                                                   **params)
                # Save features if asked for
                out_dir = out_dir + '/{0}/{1}/'.format(self.feat_type,
                                                       name_set)
                if save:
                    utils.save_features(out_dir, self.feat_type, wav_file,
                                        feat)
                    utils.copy_conf(config_file, out_dir, self.feat_type)
                feature = {'feature': feat, 'label': label}
                return feature

            # Compute derivatives if asked for
            if deltas == 1:
                # Compute features
                feat = execute_extraction_function(feat_type=self.feat_type,
                                                   waveform=waveform,
                                                   **params)
                delta1 = torchaudio.functional.compute_deltas(
                    feat)  # compute 1st order
                feat = torch.cat((feat, delta1), 1)
                # Save features if asked for
                out_dir = out_dir + '/{0}/{1}/'.format(self.feat_type,
                                                       name_set)
                if save:
                    utils.save_features(out_dir, self.feat_type,
                                        '{0}_{1}del'.format(wav_file,
                                                            deltas), feat)
                    utils.copy_conf(config_file, out_dir, self.feat_type)
                feature = {'feature': feat, 'label': label}
                return feature

            if deltas == 2:
                # Compute features
                feat = execute_extraction_function(feat_type=self.feat_type,
                                                   waveform=waveform,
                                                   **params)
                delta1 = torchaudio.functional.compute_deltas(
                    feat)  # compute 1st order
                delta2 = torchaudio.functional.compute_deltas(
                    delta1)  # compute 2nd order
                feat = torch.cat((feat, delta1, delta2), 1)
                # Save features if asked for
                out_dir = out_dir + '/{0}/{1}/'.format(self.feat_type,
                                                       name_set)
                if save:
                    utils.save_features(out_dir, self.feat_type,
                                        '{0}_{1}del'.format(wav_file,
                                                            deltas), feat)
                    utils.copy_conf(config_file, out_dir, self.feat_type)
                feature = {'feature': feat, 'label': label}
                return feature

        # if features exist, then LOAD them
        else:
            feat = np.load(feat_file_path)
            feature = {'feature': feat, 'label': label}
            return feature
Esempio n. 20
0
 avg_loss_IQ = AverageMeter()
 feature_batch_counter = 0
 for i, data in enumerate(dataloader, 0):
     # if save_features, save at the beginning of an epoch
     if opt.feature_save and epoch % opt.feature_save_every == 0 and feature_batch_counter < opt.feature_num_batches:
         if len(feature_batches) < opt.feature_num_batches:
             eval_x, eval_y = data
             eval_x = eval_x.cuda()
             feature_batches.append((eval_x, eval_y))
         # feature for real
         eval_x, eval_y = feature_batches[feature_batch_counter]
         with torch.no_grad():
             eval_f = netD.get_feature(eval_x)
         utils.save_features(
             eval_f.cpu().numpy(),
             os.path.join(
                 outff,
                 f'real_epoch_{epoch}_batch_{feature_batch_counter}_f.npy'))
         utils.save_features(
             eval_y.cpu().numpy(),
             os.path.join(
                 outff,
                 f'real_epoch_{epoch}_batch_{feature_batch_counter}_y.npy'))
         # feature for fake
         with torch.no_grad():
             eval_x = netG(feature_eval_noises[feature_batch_counter],
                           feature_eval_labels[feature_batch_counter])
             eval_y = feature_eval_labels[feature_batch_counter]
             eval_f = netD.get_feature(eval_x)
         utils.save_features(
             eval_f.cpu().numpy(),
Esempio n. 21
0
    features = utils.features_from_image(all_logos, model, my_preprocess)

    return features, all_logos, brand_map

if __name__ == '__main__':

    model, preprocess_input, input_shape = utils.load_extractor_model('InceptionV3', flavor=0)
    my_preprocess = lambda x: preprocess_input(utils.pad_image(x, input_shape))

    print('Extracting features from LogosInTheWild database (train set) - this will take a while (~5 minutes)')
    features, all_logos, brand_map = extract_litw_features('data_all_train.txt', model, my_preprocess)

    print('Processed {} logos, transformed into feature vectors'.format(len(features)))

    # save inception features at default size 299*299
    utils.save_features('./model_poi/inception_logo_features.hdf5', features, brand_map, input_shape)

    # save features for Inception with smaller input: 200 instead of 299 - last layer is 4*4 instead of 8*8
    # Extract features at last layer as well as after last 3 inception blocks (mixed9,8,7)
    input_shape = (200,200,3)
    new_preprocess = lambda x: preprocess_input(utils.pad_image(x, input_shape))

    trunc_layer = [-1, 279, 248, 228]
    for i_layer in range(4):
        model_out = Model(inputs=model.inputs, outputs=model.layers[trunc_layer[i_layer]].output)
        features = utils.features_from_image(all_logos, model_out, new_preprocess)

        extra = '_trunc{}'.format(i_layer) if i_layer > 0 else ''
        utils.save_features('./model_poi/inception_logo_features_200{}.hdf5'.format(extra), features, brand_map, input_shape)

Esempio n. 22
0
        file_n = 'imp_vector.h5' if args.dataset == 2 and i == 3 else fn
        file_name = os.path.join(BERT_output_dir, file_n)

        files = sorted(glob.glob(pattern_))
        print('found {} files for {}'.format(len(files), pattern_))
        if i == 0:
            Y_data, name_data, pos_data = [], [], []
            for file in files:
                data = load_features(file)
                # 'Y': Y, 'name': name, 'pos': pos
                Y_data = Y_data + data['Y']
                name_data = name_data + data['name']
                pos_data = pos_data + data['pos']
            save_features(file_name, {
                'Y': Y_data,
                'name': name_data,
                'pos': pos_data
            })
        else:
            data_all = []
            for file in files:
                data = load_features(file)
                data_all = data_all + data
            if args.dataset == 2 and i == 3:
                save_features_h5(file_name, data_all)
            else:
                save_features(file_name, data_all)
            print('saved in {}'.format(file_name))

    # convert to mat file
    convert2mat(BERT_output_dir, is_force=args.is_force)
Esempio n. 23
0
def extract_feature():
    MIN_BOXES = 10
    MAX_BOXES = 100
    N_CLASSES = 1601
    CONF_THRESH = 0.2
    args = parse_args()

    if args.cfg_file is not None:
        cfg_from_file(args.cfg_file)

    os.makedirs(args.output_dir, exist_ok=True)

    use_cuda = torch.cuda.is_available()
    assert use_cuda, 'Works only with CUDA'
    device = torch.device('cuda') if use_cuda else torch.device('cpu')
    # device = torch.device('cpu')
    cfg.CUDA = use_cuda
    np.random.seed(cfg.RNG_SEED)

    # Load the model.
    fasterRCNN = resnet(N_CLASSES, 101, pretrained=False)
    fasterRCNN.create_architecture()
    fasterRCNN.load_state_dict(torch.load(args.model_file))
    fasterRCNN.to(device)
    fasterRCNN.eval()
    print('Model is loaded.')

    # Load images.
    imglist = os.listdir(args.image_dir)
    num_images = len(imglist)
    print('Number of images: {}.'.format(num_images))

    # Extract features.
    for im_file in tqdm(imglist):
        im = cv2.imread(os.path.join(args.image_dir, im_file))
        blobs, im_scales = get_image_blob(im)
        assert len(im_scales) == 1, 'Only single-image batch is implemented'

        im_data = torch.from_numpy(blobs).permute(0, 3, 1, 2).to(device)
        im_info = torch.tensor([[blobs.shape[1], blobs.shape[2],
                                 im_scales[0]]]).to(device)
        gt_boxes = torch.zeros(1, 1, 5).to(device)
        num_boxes = torch.zeros(1).to(device)

        with torch.set_grad_enabled(False):
            rois, cls_prob, _, _, _, _, _, _, \
            pooled_feat = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

        boxes = rois.data.cpu().numpy()[:, :, 1:5].squeeze()
        boxes /= im_scales[0]
        cls_prob = cls_prob.data.cpu().numpy().squeeze()
        pooled_feat = pooled_feat.data.cpu().numpy()

        # Keep only the best detections.
        max_conf = np.zeros((boxes.shape[0]))
        for cls_ind in range(1, cls_prob.shape[1]):
            cls_scores = cls_prob[:, cls_ind]
            dets = np.hstack(
                (boxes, cls_scores[:, np.newaxis])).astype(np.float32)
            keep = np.array(cpu_nms(dets, cfg.TEST.NMS))
            max_conf[keep] = np.where(cls_scores[keep] > max_conf[keep],
                                      cls_scores[keep], max_conf[keep])

        keep_boxes = np.where(max_conf >= CONF_THRESH)[0]
        if len(keep_boxes) < MIN_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MIN_BOXES]
        elif len(keep_boxes) > MAX_BOXES:
            keep_boxes = np.argsort(max_conf)[::-1][:MAX_BOXES]

        image_feat = pooled_feat[keep_boxes]
        if args.save_boxes:
            image_bboxes = boxes[keep_boxes]
        else:
            image_bboxes = None

        output_file = os.path.join(args.output_dir,
                                   im_file.split('.')[0] + '.npy')
        save_features(output_file, image_feat, image_bboxes)