Python DataLoader Examples, loader.DataLoader Python Examples

Example #1

0

Show file

 def test_loader_set_k(self, setup):
     l = loader.DataLoader('toy', 2, 'aggregate')
     assert l.k == 2
     assert len(l.filtered_crowd_df) == l.num_questions * l.k
     l.set_k(1)
     assert l.k == 1
     assert len(l.filtered_crowd_df) == l.num_questions * l.k

Example #2

0

Show file

def run(args):
    l = loader.DataLoader(args.dataset, args.k, args.mode, args.dataset_path,
                          args.crowd_annotations_path, args.ground_truths_path)
    data, gt = l.get_data()
    result, accuracy = algorithms.main(args, data, gt)

    ind_to_question_dict = l.get_ind_to_question_dict()
    ind_to_annotation_dict = l.get_ind_to_annotation_dict()

    result_annotations = pd.DataFrame(data=result, columns=['Annotation'])
    result_annotations.reset_index(level=0, inplace=True)
    result_annotations = result_annotations.rename(
        columns={'index': 'Question'})

    result_annotations['Question'] = result_annotations['Question'].map(
        ind_to_question_dict)
    result_annotations['Annotation'] = result_annotations['Annotation'].map(
        ind_to_annotation_dict)

    if args.print_result:
        print("Predictions:")
        print(result_annotations)
        if args.mode == 'test':
            print("Accuracy:")
            print(accuracy)
    if args.output is not None:
        utils.to_csv(result, args.output, ind_to_question_dict,
                     ind_to_annotation_dict)

Example #3

0

Show file

 def test_loader_custom_file_paths(self):
     current_dir = os.path.dirname(os.path.abspath(__file__))
     copy_data_dir = os.path.join(current_dir, 'test_data')
     custom_crowd_file = os.path.join(copy_data_dir, 'crowd.csv')
     custom_gt_file = os.path.join(copy_data_dir, 'gold.csv')
     if not os.path.exists(custom_crowd_file) or not os.path.exists(
             custom_gt_file):
         pytest.skip("Custom crowd and ground truth files missing")
     l = loader.DataLoader('toy',
                           2,
                           'test',
                           data_dir=None,
                           crowd_annotations_path=custom_crowd_file,
                           ground_truths_path=custom_gt_file)
     data, gt = l.get_data()
     assert data == {
         0: {
             0: [0],
             1: [0]
         },
         1: {
             0: [1],
             2: [3]
         },
         2: {
             0: [2],
             1: [0]
         }
     }
     assert np.array_equal(gt, [0, 1, 2])

Example #4

0

Show file

 def test_loader_custom_dataset_path(self):
     current_dir = os.path.dirname(os.path.abspath(__file__))
     copy_data_dir = os.path.join(current_dir, 'test_data')
     if not os.path.exists(copy_data_dir):
         pytest.skip("Custom dataset missing")
     l = loader.DataLoader('toy', 2, 'aggregate', data_dir=copy_data_dir)
     data, _ = l.get_data()
     assert data == {0: {0: [0], 1: [0]}, 1: {
         0: [1], 2: [3]}, 2: {0: [2], 1: [0]}}

Example #5

0

Show file

def main(config_loader):
    # load the train dataset
    train_dir = os.path.join(config_loader.data_dir, "train")
    train_loader = loader.DataLoader(train_dir, is_training=True)
    train_dataset = train_loader.load(config_loader)
    # load the test dataset
    test_dir = os.path.join(config_loader.data_dir, "test")
    test_loader = loader.DataLoader(test_dir, is_training=False)
    test_dataset = test_loader.load(config_loader)

    training_callback = None
    # judge use default callback
    callback_name = config_loader.training_callback
    if callback_name in ("default", None):
        # use default callback
        pass
    else:
        # split the module name and class name
        module_name, class_name = callback_name.split(".")
        # load training function in train_watermark
        o = importlib.import_module(module_name)
        Callback = getattr(o, class_name)
        training_callback = Callback(config_loader=config_loader)

    # train
    if config_loader.training_device in ("CPU"):
        with tf.device("/cpu:0"):
            train(train_dataset=train_dataset,
                  test_dataset=test_dataset,
                  config_loader=config_loader,
                  training_callback=training_callback)
    else:
        with tf.device("/gpu:0"):
            train(train_dataset=train_dataset,
                  test_dataset=test_dataset,
                  config_loader=config_loader,
                  training_callback=training_callback)

Example #6

0

Show file

    def __init__(self, training_data_path, model_path, validation_percentage,
                 initial_learning_rate, decay_step, decay_rate, epoch,
                 img_size, n_class, batch_size, batch_norm_mode, depth):

        self.training_path = training_data_path
        self.model_path = model_path
        self.val_data_cnt = validation_percentage
        self.init_learning = initial_learning_rate
        self.decay_step = decay_step
        self.decay_rate = decay_rate
        self.epoch_num = epoch
        self.batch_size = batch_size
        self.batch_mode = batch_norm_mode
        self.depth = depth

        self.data_loader = loader.DataLoader(img_size=img_size)

        print('data Loading Started')
        dstime = time.time()
        self.img_list, self.label_list, self.data_count = self.data_loader.data_list_load(
            self.training_path, mode='train')
        self.shuffled_img_list, self.shuffled_label_list = self.data_loader.data_shuffle(
            self.img_list, self.label_list)
        detime = time.time()
        print('data Loading Complete. Consumption Time :', detime - dstime)

        print('Dataset Split Started')
        dsstime = time.time()
        self.trainX, self.trainY, self.valX, self.valY = self.data_loader.data_split(
            self.shuffled_img_list,
            self.shuffled_label_list,
            val_size=self.val_data_cnt)

        dsetime = time.time()
        print('Train Dataset Count:', len(self.trainX),
              'Validation Dataset Count:', len(self.valX))
        print('data Split Complete. Consumption Time :', dsetime - dsstime)

        self.model = unet.Model(batch_norm_mode=self.batch_mode,
                                depth=self.depth,
                                img_size=img_size,
                                n_channel=1,
                                n_class=n_class,
                                batch_size=self.batch_size)

        # TB
        self.merged_summary = tf.summary.merge_all()
        self.writer = tf.summary.FileWriter('./logs/' + option_name)

Example #7

0

Show file

 def test_loader_test(self, setup):
     l = loader.DataLoader('toy', 2, 'test')
     data, gt = l.get_data()
     assert data == {
         0: {
             0: [0],
             1: [0]
         },
         1: {
             0: [1],
             2: [3]
         },
         2: {
             0: [2],
             1: [0]
         }
     }
     assert np.array_equal(gt, [0, 1, 2])

Example #8

0

Show file

    def test_loader_aggregate(self, setup):

        l = loader.DataLoader('toy', 2, 'aggregate')
        data, _ = l.get_data()
        assert data == {
            0: {
                0: [0],
                1: [0]
            },
            1: {
                0: [1],
                2: [3]
            },
            2: {
                0: [2],
                1: [0]
            }
        }

Example #9

0

Show file

import tensorflow as tf
tf.enable_eager_execution()
import loader
import config
import matplotlib.pyplot as plt
import numpy as np

conf = config.ConfigLoader(
    "C:\\Users\\happy\\Desktop\\Venus\\AdvNet\\app\\abc\\config.txt")

load = loader.DataLoader(
    "C:\\Users\\happy\\Desktop\\Venus\\AdvNet\\app\\abc\\structure_scene_box\\train",
    True)
data = load.load(config_loader=conf)

for in1, in2, out in data.take(1):
    x = [in1[0], in2[0], out[0]]
    for i in range(3):
        plt.subplot(1, 3, i + 1)
        # getting the pixel values between [0, 1] to plot it.
        drawable = np.array(x[i] * 0.5 + 0.5)
        drawable[drawable < 0] = 0
        drawable[drawable > 1] = 1

        # plt.imshow(display_list[i] * 0.5 + 0.5)
        plt.imshow(drawable)
        plt.axis('off')
    plt.show()

Example #10

0

Show file

File: __init__.py Project: orb1t/liquidrocket

def getInputData():
    loader = ld.DataLoader(config.datafile)
    return loader.load()

Example #11

0

Show file

File: txt.py Project: xkuang/project

import loader

d = loader.DataLoader()
review = open('data/reviews.csv', 'w')
categories = open('data/cat.csv', 'w')

c = 0
dcat = {}

for business in d.alldata():
    c += 1
    for cat in business.categories:
        if cat in dcat:
            dcat[cat] = dcat[cat] + 1
        else:
            dcat[cat] = 1

    categories.write(','.join(business.categories).encode('utf-8'))
    categories.write('\n')

    review.write(''.join(business.reviews).replace('\t', ' ').replace(
        '\n', ' ').encode('utf-8'))
    review.write('\n')
    if c % 1000 == 0:
        print c, '/', 85901

f = open('data/label_rank.csv', 'w')
lst = [(v, k) for k, v in dcat.items()]
for _, cat in sorted(lst, reverse=True):
    f.write(cat.strip())
    f.write('\n')

Example #12

0

Show file

File: evaluate.py Project: happyyuwei/WMNet

def eval_all(data_path,
             model_path,
             visual_result_dir=None,
             watermark_path=None,
             watermark_binary=False,
             decode_path=None,
             attack_test_func=None):
    """
    评估函数, 评估模型的PSNR与SSIM, 若存在水印，则评估水印的PSNR或BER
    @since 2019.11.27
    @author yuwei
    """

    # create empty config, this is used when loading data
    configs = config.ConfigLoader()
    # load data
    data_loader = loader.DataLoader(data_path, is_training=False)
    dataset = data_loader.load(configs)
    print("load data....")

    # load wateramrk
    watermark_enable = False
    wm_target = None
    if watermark_path != None:
        watermark_enable = True
        """
        @author yuwei
        @update 2019.11.28
        change_scale没有指定，因此读取的水印为[0,1], 与要求[-1,1]不符，使得后续计算误码率有误。现已修复。
        """
        wm_target = train_tool.read_image(watermark_path,
                                          32,
                                          32,
                                          binary=watermark_binary,
                                          change_scale=True)
        print("load watermark....")

    # load model
    print("load model....")
    model = model_use.GeneratorModel(model_path=model_path,
                                     watermark=watermark_enable,
                                     decoder_path=decode_path)

    if visual_result_dir != None and os.path.exists(
            visual_result_dir) == False:
        os.mkdir(visual_result_dir)

    # start eval
    print("start eval....")
    """
    The result set is a key-value dir, 
    which will save 1. mean_value, 2. value_list(the mean value is calculated bu this)
    """
    result_set = {}
    # the value list will save all the results by the image
    value_list = []
    # default value
    image_mean_psnr = 0
    image_mean_ssim = 0
    wm_mean_error = 0
    # image num
    count = 0
    # for each
    for input_image, ground_truth in dataset:
        # result each
        result_each = {}

        # generate
        output_tensor, wm_tensor, wm_feature = model.generate(
            input_image, attack_test_func=attack_test_func)

        # eval image
        image_result_each = evaluate(output_tensor,
                                     ground_truth,
                                     psnr_enable=True,
                                     ssim_enable=True,
                                     ber_enable=False)

        # save results
        result_each["image_psnr"] = image_result_each["psnr"]
        result_each["image_ssim"] = image_result_each["ssim"]

        # caluclate total value
        image_mean_psnr = image_mean_psnr + result_each["image_psnr"]
        image_mean_ssim = image_mean_ssim + result_each["image_ssim"]

        if watermark_path != None:
            # eval watermark
            wm_result_each = evaluate(wm_tensor,
                                      wm_target,
                                      psnr_enable=(not watermark_binary),
                                      ssim_enable=False,
                                      ber_enable=watermark_binary)

            # calcualte watermark error
            if watermark_binary == True:
                result_each["wm_ber"] = wm_result_each["ber"]
                wm_mean_error = wm_mean_error + result_each["wm_ber"]
            else:
                result_each["wm_psnr"] = wm_result_each["psnr"]
                wm_mean_error = wm_mean_error + result_each["wm_psnr"]

        # append
        value_list.append(result_each)

        # save visual results
        if visual_result_dir != None:

            # basic
            image_list = [input_image, ground_truth, output_tensor]
            title_list = ["IN", "GT", "PR"]

            # wm
            if watermark_path != None:
                image_list.append(wm_tensor)
                image_list.append(wm_feature)
                title_list.append("WM")
                title_list.append("WF")

            # save image
            train_tool.save_images(image_list, title_list, visual_result_dir,
                                   count + 1)

        # one image test finished
        count = count + 1
        # this print will flush at the same place
        print("\r" + "testing image {} ...".format(count), end='', flush=True)

    # change line now
    print("")
    # calculate image mean value
    mean_value = {}
    image_mean_psnr = image_mean_psnr / count
    mean_value["psnr"] = image_mean_psnr
    image_mean_ssim = image_mean_ssim / count
    mean_value["ssim"] = image_mean_ssim

    # mean watermark error
    if watermark_path != None:
        wm_mean_error = wm_mean_error / count

        if watermark_binary == True:
            mean_value["wm_ber"] = wm_mean_error
        else:
            mean_value["wm_psnr"] = wm_mean_error

    # save all
    result_set["mean_value"] = mean_value
    result_set["value_list"] = value_list

    # genrate report
    eval_report = "The evaluating report comes here:\n'image psnr' = {}, 'image ssim' = {}".format(
        image_mean_psnr, image_mean_ssim)

    if watermark_path != None:
        eval_report_wm = ", 'watermark {}' = {}"
        if watermark_binary == True:
            wm_format = "ber"
        else:
            wm_format = "psnr"
        eval_report_wm = eval_report_wm.format("wm_" + wm_format,
                                               mean_value["wm_" + wm_format])
        eval_report = eval_report + eval_report_wm

    # print(eval_report)
    return result_set, eval_report

Example #13

0

Show file

 def test_loader_invalid_k(self, setup):
     with pytest.raises(AssertionError):
         l = loader.DataLoader('toy', -1, 'aggregate')

Example #14

0

Show file

 def test_loader_invalid_annotator_count(self, setup):
     with pytest.raises(AssertionError):
         l = loader.DataLoader('toy', 30, 'aggregate')

Example #15

0

Show file

 def test_loader_invalid_mode(self, setup):
     with pytest.raises(AssertionError):
         l = loader.DataLoader('toy', 2, 'mode')

Example #16

0

Show file

 def test_loader_get_dicts(self, setup):
     l = loader.DataLoader('toy', 2, 'aggregate')
     assert isinstance(l.get_ind_to_question_dict(), dict)
     assert isinstance(l.get_ind_to_annotation_dict(), dict)