Python ParallelBatchIterator 예제들, parallel.ParallelBatchIterator Python 예제들

예제 #1

0

파일 보기

파일: resnet_trainer.py 프로젝트: DataForces/CV_LUNA

    def train(self, generator_train, X_train, generator_val, X_val):
        #filenames_train, filenames_val = patch_sampling.get_filenames()
        #generator = partial(patch_sampling.extract_random_patches, patch_size=P.INPUT_SIZE, crop_size=OUTPUT_SIZE)


        train_true = filter(lambda x: "True" in x, X_train)
        train_false = filter(lambda x: "False" in x, X_train)

        print "N train true/false", len(train_true), len(train_false)
        print X_train[:2]

        val_true = filter(lambda x: "True" in x, X_val)
        val_false = filter(lambda x: "False" in x, X_val)

        n_train_true = len(train_true)
        n_val_true = len(val_true)

        logging.info("Starting training...")
        for epoch in range(P.N_EPOCHS):
            self.pre_epoch()

            if epoch in LR_SCHEDULE:
                logging.info("Setting learning rate to {}".format(LR_SCHEDULE[epoch]))
                self.l_r.set_value(LR_SCHEDULE[epoch])


            np.random.shuffle(train_false)
            np.random.shuffle(val_false)

            train_epoch_data = train_true + train_false[:n_train_true]
            val_epoch_data = val_true + val_false[:n_val_true]

            np.random.shuffle(train_epoch_data)
            #np.random.shuffle(val_epoch_data)

            #Full pass over the training data
            train_gen = ParallelBatchIterator(generator_train, train_epoch_data, ordered=False,
                                                batch_size=P.BATCH_SIZE_TRAIN//3,
                                                multiprocess=P.MULTIPROCESS_LOAD_AUGMENTATION,
                                                n_producers=P.N_WORKERS_LOAD_AUGMENTATION)

            self.do_batches(self.train_fn, train_gen, self.train_metrics)

            # And a full pass over the validation data:
            val_gen = ParallelBatchIterator(generator_val, val_epoch_data, ordered=False,
                                                batch_size=P.BATCH_SIZE_VALIDATION//3,
                                                multiprocess=P.MULTIPROCESS_LOAD_AUGMENTATION,
                                                n_producers=P.N_WORKERS_LOAD_AUGMENTATION)

            self.do_batches(self.val_fn, val_gen, self.val_metrics)
            self.post_epoch()

예제 #2

0

파일 보기

    def train(self, train_splits, filenames_val, train_generator, val_generator):
        logging.info("Starting training...")

        #Loss value, epoch
        last_best = (1000000000000, -1)

        for epoch in range(P.N_EPOCHS):
            self.pre_epoch()

            filenames_train = train_splits[epoch]
            #Full pass over the training data
            np.random.shuffle(filenames_train)

            train_gen = ParallelBatchIterator(train_generator, filenames_train, ordered=False,
                                                batch_size=P.BATCH_SIZE_TRAIN,
                                                multiprocess=P.MULTIPROCESS_LOAD_AUGMENTATION,
                                                n_producers=P.N_WORKERS_LOAD_AUGMENTATION)

            _ = self.do_batches(self.train_fn, train_gen, self.train_metrics)

            # And a full pass over the validation data:
            #Shuffling not really necessary..
            np.random.shuffle(filenames_val)

            val_gen = ParallelBatchIterator(val_generator, filenames_val, ordered=False,
                                                batch_size=P.BATCH_SIZE_VALIDATION,
                                                multiprocess=P.MULTIPROCESS_LOAD_AUGMENTATION,
                                                n_producers=P.N_WORKERS_LOAD_AUGMENTATION)

            val_loss = self.do_batches(self.val_fn, val_gen, self.val_metrics)
            self.post_epoch()

            if val_loss < last_best[0]:
                last_best = (val_loss, epoch)

            #No improvement for 6 epoch
            if epoch - last_best[1] > 5:
                self.l_r = 0.1*self.l_r
                last_best = (val_loss, epoch)
                logging.info("REDUCING LEARNING RATE TO {}\n----\n\n".format(self.l_r.eval()))

예제 #3

0

파일 보기

    def train(self, X_train, X_val):

        train_true = filter(lambda x: x[2] == 1, X_train)
        train_false = filter(lambda x: x[2] == 0, X_train)

        val_true = filter(lambda x: x[2] == 1, X_val)
        val_false = filter(lambda x: x[2] == 0, X_val)

        n_train_true = len(train_true)
        n_val_true = len(val_true)

        make_epoch_helper = functools.partial(make_epoch,
                                              train_true=train_true,
                                              train_false=train_false,
                                              val_true=val_true,
                                              val_false=val_false)

        logging.info("Starting training...")
        epoch_iterator = ParallelBatchIterator(make_epoch_helper,
                                               range(P.N_EPOCHS),
                                               ordered=False,
                                               batch_size=1,
                                               multiprocess=False,
                                               n_producers=1)

        for epoch_values in epoch_iterator:
            self.pre_epoch()
            train_epoch_data, val_epoch_data = epoch_values

            train_epoch_data = util.chunks(train_epoch_data,
                                           P.BATCH_SIZE_TRAIN)
            val_epoch_data = util.chunks(val_epoch_data,
                                         P.BATCH_SIZE_VALIDATION)

            self.do_batches(self.train_fn, train_epoch_data,
                            self.train_metrics)
            self.do_batches(self.val_fn, val_epoch_data, self.val_metrics)

            self.post_epoch()
            logging.info("Setting learning rate to {}".format(
                P.LEARNING_RATE * ((0.985)**self.epoch)))
            self.l_r.set_value(P.LEARNING_RATE * ((0.985)**self.epoch))

예제 #4

0

파일 보기

if __name__ == "__main__":
    if len(sys.argv) < 3:
      print "first parameter is model file, second parameter is net file"
      quit()

    model_file = sys.argv[1]
    net_file = sys.argv[2]

    ### load all predicting filenames
    file_names = glob(P.FILENAMES_PREDICTION)
    batch_size = P.BATCH_SIZE_PREDICTION

    multiprocess = False
    gen = ParallelBatchIterator(partial(load_images, deterministic = True),
                                        file_names, ordered = True,
                                        batch_size = batch_size,
                                        multiprocess = multiprocess)

    caffe_net = caffe.Net(net_file, model_file, caffe.TEST)

    predictions_folder = model_file + '_predictions'
    util.make_dir_if_not_present(predictions_folder)

    all_probabilities = []
    all_filenames = []
    for i, batch in enumerate(tqdm(gen)):
        inputs, labels, weights, fnames = batch

        if inputs.shape[0] == batch_size:
            caffe_net.blobs['data'].data[...] = inputs.astype(np.float32, copy = False)
            caffe_net.forward()

예제 #5

0

파일 보기

파일: resnet_prediction.py 프로젝트: wakanpaladin/znet

def main():
    model_path = sys.argv[1]
    net_file = sys.argv[2]
    ### load all predicting filenames
    filenames = glob.glob(P.FILENAMES_PREDICTION)
    batch_size = P.BATCH_SIZE_PREDICTION
    ### get augmentation number
    test_im = np.zeros((64, 64))
    n_testtime_augmentation = len(testtime_augmentation(test_im, 0)[0])
    #### get the parallel data generator
    gen = ParallelBatchIterator(get_images_with_filenames,
                                filenames,
                                ordered=True,
                                batch_size=batch_size //
                                (3 * n_testtime_augmentation),
                                multiprocess=False,
                                n_producers=12)
    ### get wide resnet caffe model
    caffe_net = caffe.Net(net_file, model_path, caffe.TEST)
    ### do forward pass
    all_probabilities = []
    all_filenames = []
    print('begin predicting...')
    for i, batch in tqdm(enumerate(gen)):
        data, label, fnames = batch
        ### pass data and label to caff net, please set the batch size to 12(atomic batchsize) for both prediction batch size and train batch size
        if data.shape[0] == batch_size:
            caffe_net.blobs['data'].data[...] = data.astype(np.float32,
                                                            copy=False)
            caffe_net.blobs['label'].data[...] = label.astype(np.float32,
                                                              copy=False)
            caffe_net.forward()
            softmax_out = caffe_net.blobs['prob'].data.copy()
            all_probabilities += list(softmax_out[:, 1].tolist())
            all_filenames += list(fnames)
            # print("one batch done")
        else:
            break
    ### for all filenames get the probalilities(3 * n_testtime_augmentation)
    d = {f: [] for f in filenames}
    for probability, f in zip(all_probabilities, all_filenames):
        d[f].append(probability)
    ### the code will not run unless the batch size is not the atomic batch size(3 * n_testtime_augmentation)
    for key in d.keys():
        if len(d[key]) == 0:
            d.pop(key)
    print('end predicting')
    print('write to csv')
    cur_dir = os.path.dirname(os.path.abspath(__file__))
    candidates = pd.read_csv(
        os.path.join(cur_dir,
                     './../../../data/finalizedcandidates_unet_01.csv'))
    data = []
    ### get the mean prob for each filename and get the row number of the candidate
    for x in d.iteritems():
        fname, probabilities = x
        prob = np.mean(probabilities)
        candidates_row = int(os.path.split(fname)[1].replace('.pkl.gz',
                                                             '')) - 2
        print candidates_row
        data.append(
            list(candidates.iloc[candidates_row].values)[:-1] + [str(prob)])
    ### write the prob to a .csv
    submission = pd.DataFrame(
        columns=['seriesuid', 'coordX', 'coordY', 'coordZ', 'probability'],
        data=data)
    submission_path = os.path.join(cur_dir,
                                   './../../../data/submission_subset01.csv')
    submission.to_csv(
        submission_path,
        columns=['seriesuid', 'coordX', 'coordY', 'coordZ', 'probability'])
    print('finished!')

예제 #6

0

파일 보기

파일: predict_resnet_cartesius.py 프로젝트: DataForces/CV_LUNA

                image[0], target)  #Take color channel of image
            new_inputs += ims
            new_targets += trs

        new_filenames = []
        for fname in filenames:
            for i in range(int(len(new_inputs) / len(filenames))):
                new_filenames.append(fname)
        #print 'inputs:',len(inputs),'filenames:',len(filenames),'new_filenames:',len(new_filenames)
        return np.array(new_inputs, dtype=np.float32), np.array(
            new_targets, dtype=np.int32), new_filenames

    gen = ParallelBatchIterator(get_images_with_filenames,
                                filenames,
                                ordered=True,
                                batch_size=batch_size //
                                (3 * n_testtime_augmentation),
                                multiprocess=multiprocess,
                                n_producers=11)

    predictions_file = os.path.join(
        model_folder, 'predictions_subset{}_epoch{}_model{}.csv'.format(
            subsets, epoch, P.MODEL_ID))
    candidates = pd.read_csv('../../csv/unetRelabeled.csv')
    #candidates['probability'] = float(1337)

    print "Predicting {} patches".format(len(filenames))

    all_probabilities = []
    all_filenames = []

예제 #7

0

파일 보기

def main():
    filenames_train, filenames_val = get_file_names()

    model = define_network(x)
    net = model['out']
    net = tf.reshape(net, [-1, 2])
    prob = tf.nn.softmax(net)
    cost = tf.nn.softmax_cross_entropy_with_logits(logits=net, labels=y)
    cost = cost * w
    cost = tf.reduce_mean(cost)
    train_op = tf.train.RMSPropOptimizer(0.0001, 0.9).minimize(cost)
    pred_op = tf.argmax(net, 1)

    generator_train = dataset.load_images

    train_gen = ParallelBatchIterator(generator_train, filenames_train, ordered=False,
                                      batch_size=batch_size,
                                      multiprocess=P.MULTIPROCESS_LOAD_AUGMENTATION,
                                      n_producers=P.N_WORKERS_LOAD_AUGMENTATION)
    val_gen = ParallelBatchIterator(generator_train, filenames_val, ordered=True,
                                    batch_size=batch_size,
                                    multiprocess=P.MULTIPROCESS_LOAD_AUGMENTATION,
                                    n_producers=P.N_WORKERS_LOAD_AUGMENTATION)

    gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.8)
    model_dir = '/home/didia/didia/data/result/model/'
    saver = tf.train.Saver()
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        tf.global_variables_initializer().run()

        for epoch in range(epoch_num):
            saver.save(sess, os.path.join(model_dir, 'model'))

            for i, batch in enumerate(tqdm(train_gen)):
                inputs, targets, weights, _ = batch

                inputs = np.reshape(inputs, [-1, 512, 512, 1])
                targets = np.reshape(targets, [-1, 1])
                targets = np.concatenate((1 - targets, targets), axis=1)
                # print 'sum',np.sum(targets,axis=0)
                weights = np.reshape(weights, [-1])
                _, preds, py = sess.run([train_op, prob, pred_op], feed_dict={
                    x: inputs,
                    w: weights,
                    y: targets,
                    p_keep_conv: 0.8,
                    phase_train: True})

                np.save('/home/didia/didia/data/result/labels.npy', targets)
                np.save('/home/didia/didia/data/result/preds.npy', preds)

                if i % 30:
                    break
                    # continue
                    # pass
                targets = targets[:, 1]
                num = len(py)
                acc = np.sum(py == targets)
                p = np.sum(py)
                n = num - p

                fp = np.sum(py - targets == 1)
                fn = np.sum(py - targets == -1)
                tp = p - fp
                tn = n - fn
                print '\n\tT', tp, tn
                print '\tF', fp, fn

                print '\tY', np.sum(targets)
                print '\tN', num - np.sum(targets)

            tp_list = []
            fp_list = []
            tn_list = []
            fn_list = []
            for i, batch in enumerate(tqdm(val_gen)):
                if i > 100:
                    # break
                    pass
                inputs, targets, weights, _ = batch
                inputs = np.reshape(inputs, [-1, 512, 512, 1])
                # targets = np.reshape(targets,[-1])
                targets = np.reshape(targets, [-1, 1])
                targets = np.concatenate((1 - targets, targets), axis=1)
                weights = np.reshape(weights, [-1])
                py, preds = sess.run([pred_op, prob], feed_dict={
                    x: inputs,
                    p_keep_conv: 1,
                    phase_train: False, })

                np.save('/home/didia/didia/data/result/tensorflow/{:d}_labels.npy'.format(i), targets)
                np.save('/home/didia/didia/data/result/tensorflow/{:d}_preds.npy'.format(i), preds)

                targets = targets[:, 1]
                num = len(py)
                acc = np.sum(py == targets)
                p = np.sum(py)
                n = num - p

                fp = np.sum(py - targets == 1)
                fn = np.sum(py - targets == -1)
                tp = p - fp
                tn = n - fn

                tp_list.append(tp)
                tn_list.append(tn)
                fp_list.append(fp)
                fn_list.append(fn)

            tp, tn = np.mean(tp_list) / num, np.mean(tn_list) / num
            fp, fn = np.mean(fp_list) / num, np.mean(fn_list) / num
            print
            print '\n\tt', np.mean(tp_list) / num, np.mean(tn_list) / num
            print '\tf', np.mean(fp_list) / num, np.mean(fn_list) / num
            print '\taccuracy', tp + tn
            print '\trecall', tp / (tp + fn)
            print '\tdice', tp / (tp + fn + fp)
            print '\tprecision', tp / (tp + fp)
            # print

    return