Esempio n. 1
0
def prepare_data_for_training(args):
    """ make dataset ready for training and validation"""
    # Form the train/test splits and write them to disk
    dataset = data.Dataset(args)
    # get image classes and image counts in each class
    label_map = dataset.get_class_info()
    class_count = len(list(label_map.values()))
    # split the data and store it in log dir
    df_train, df_test = dataset.split_dataset()

    # perform dataset augmentations
    image_data = augment.Augmentation(args)
    # get the data gens for training and test images
    train_data_gen, _ = image_data.map_fn_train(df_train)
    test_data_gen, _ = image_data.map_fn_test(df_test)

    return train_data_gen, test_data_gen, df_train, df_test, class_count
Esempio n. 2
0
param_training = ed.EasyDict()
param_training.epoch = 25
param_training.lr_schedule = 'poly'
param_training.init_lr = 0.005

param_model = ed.EasyDict()
param_model.rand_n = 30

if __name__ == '__main__':
    # rank matching 的匹配效果图
    import dataset_utils
    import matplotlib.pyplot as plt

    root_path = r'D:\Documents\Data_Files\Datasets\Pascal\VOC2012'
    dataset = dataset_utils.Dataset(root_path)
    img, label = dataset[4]

    backbone_root_path = 'd:/Documents/Data_Files/Parameters'
    ssd = SSD(backbone_root_path)

    mx_img, mx_label = ssd.get_transform_fn_val()(img, label)
    mx_label = mx_label.expand_dims(axis=0)  # (1, M, 5)
    tensor_preds = ssd(mx_img.expand_dims(axis=0))
    cls_targets, box_targets, pos_neg_samples = generate_target(mx.nd.array(ssd.get_anchors()).expand_dims(axis=0), tensor_preds[:, :, 21:], 
        mx_label[:, :, -4:], 
        mx_label[:, :, 0])

    pos_idx = (np.where(pos_neg_samples[0].asnumpy() > 0))[0]  # (P, )

    fig = plt.figure()
    param.inputWahlDescriptor = FLAGS.desc_type != 0
    param.dim_model = FLAGS.conv_dim

    dir = dataset_constants.m40_desc_dataset_dict.get(FLAGS.dataset, 'none')
    if dir == 'none':
        print('Unsupported dataset type %s, exit!' % FLAGS.dataset)
        exit(1)
    param.input_dir = dir

    if param.inputWahlDescriptor:
        descriptor_to_use = 'wahl_long_desc_run_1_'
    else:
        descriptor_to_use = 'merged_proposed_kl_bugfix4_desc_run_0_'

    dict_labels = dataset_utils.m40_label_to_int_dict
    m40_dataset = dataset_utils.Dataset(param.input_dir, dict_labels,
                                        descriptor_to_use)

    N_classes = len(dict_labels)
    classes_train = m40_dataset.train_label.shape[1]
    classes_test = m40_dataset.test_label.shape[1]
    assert (N_classes == classes_train)
    assert (N_classes == classes_test)
    print('Used classes %d, train %d test %d' %
          (N_classes, classes_train, classes_test))

    # we check the dimension of this nparray to ensure our data is correct
    N_train = m40_dataset.train_feat.shape
    N_l_train = m40_dataset.train_label.shape
    print("Training dataset on m40, N_train %dx%d, N_label_train %dx%d" %
          (N_train[0], N_train[1], N_l_train[0], N_l_train[1]))
Esempio n. 4
0
    train_df, val_df = load_data(data_config.data_source)
    vocab_idx, vocab_phone_CZidx, vocab_phone_ENidx, vocab_phone_HUidx, vocab_phone_RUidx = load_vocabularies(data_config.data_source)

    # Get useful dimensions from vocabularies
    PHONE_CZ_ALPHABET_LEN = len(vocab_phone_CZidx)+1
    PHONE_EN_ALPHABET_LEN = len(vocab_phone_ENidx)+1
    PHONE_HU_ALPHABET_LEN = len(vocab_phone_HUidx)+1
    PHONE_RU_ALPHABET_LEN = len(vocab_phone_RUidx)+1


    print('Generating ids')

    custom_unit_dict = {"sent_unit": "chars"}

    training_data = dataset_utils.Dataset(train_df, vocab_idx, vocab_phone_CZidx, vocab_phone_ENidx, vocab_phone_HUidx, vocab_phone_RUidx)
    training_data.generate(custom_unit_dict, has_class=True, add_start_end_tag=False)

    validation_data = dataset_utils.Dataset(val_df, vocab_idx, vocab_phone_CZidx, vocab_phone_ENidx, vocab_phone_HUidx, vocab_phone_RUidx)
    validation_data.generate(custom_unit_dict, has_class=True, add_start_end_tag=False)

    # Get useful dimensions from dataset
    MAX_CHAR_IN_SENT_LEN = find_max_len(training_data.ids_sentence, validation_data.ids_sentence)
    MAX_PHONE_CZ_LEN = find_max_len(training_data.ids_phones_CZ, validation_data.ids_phones_CZ)
    MAX_PHONE_EN_LEN = find_max_len(training_data.ids_phones_EN, validation_data.ids_phones_EN)
    MAX_PHONE_HU_LEN = find_max_len(training_data.ids_phones_HU, validation_data.ids_phones_HU)
    MAX_PHONE_RU_LEN = find_max_len(training_data.ids_phones_RU, validation_data.ids_phones_RU)


    print('Preprocessing data')