コード例 #1
0
ファイル: predict.py プロジェクト: wassname/mammoviews
def get_predictions(data_dir, preprocessing_function=lambda x: x, model=model):
    if isinstance(preprocessing_function, str):
        if preprocessing_function == 'fliplr':
            preprocessing_function = lambda x: x[..., ::-1, :]
        elif preprocessing_function in ('identity', 'orig'):
            preprocessing_function = lambda x: x
        else:
            raise ValueError('unknown preprocessing_function:\t%s' %
                             preprocessing_function)

    val_datagen = ImageDataGenerator(**norm_params)
    val_datagen.preprocessing_function = preprocessing_function
    datagen_val_output = val_datagen.flow_from_directory(data_dir,
                                                         shuffle=False,
                                                         **flowfromdir_params)

    gen_ = datagen_val_output
    yhat = model.predict_generator(
        gen_,
        steps=len(gen_),
        verbose=1,
    )

    dfdict = {"scores_%d" % nn: yy for nn, yy in enumerate(yhat.T)}
    dfdict.update({"files": gen_.filenames, "label": gen_.classes})
    dfres = pd.DataFrame(dfdict)
    return dfres
コード例 #2
0
def save_bottlebeck_features():
    if os.path.exists('bottleneck_features_train.npy') and (len(
            sys.argv) == 1 or sys.argv[1] != "--force"):
        print("Using saved features, pass --force to save new features")
        return
    datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
    train_generator = datagen.flow_from_directory(
        train_dir,
        target_size=(config.img_width, config.img_height),
        batch_size=config.batch_size,
        class_mode="binary")

    val_generator = datagen.flow_from_directory(
        validation_dir,
        target_size=(config.img_width, config.img_height),
        batch_size=config.batch_size,
        class_mode="binary")

    # build the VGG16 network
    model = VGG16(include_top=False, weights='imagenet')

    print("Predicting bottleneck training features")
    training_labels = []
    training_features = []
    for batch in range(5):  # nb_train_samples // config.batch_size):
        data, labels = next(train_generator)
        training_labels.append(labels)
        training_features.append(model.predict(data))
    training_labels = np.concatenate(training_labels)
    training_features = np.concatenate(training_features)
    np.savez(open('bottleneck_features_train.npy', 'wb'),
             features=training_features,
             labels=training_labels)

    print("Predicting bottleneck validation features")
    validation_labels = []
    validation_features = []
    validation_data = []
    for batch in range(nb_validation_samples // config.batch_size):
        data, labels = next(val_generator)
        validation_features.append(model.predict(data))
        validation_labels.append(labels)
        validation_data.append(data)
    validation_labels = np.concatenate(validation_labels)
    validation_features = np.concatenate(validation_features)
    validation_data = np.concatenate(validation_data)
    np.savez(open('bottleneck_features_validation.npy', 'wb'),
             features=validation_features,
             labels=validation_labels,
             data=validation_data)
コード例 #3
0
def Scaling(path, image_save_dir, aug_number):
    datagen = ImageDataGenerator(zoom_range=0.3, fill_mode='constant')  # 0.3
    img = load_img(path, grayscale=True)  # 这是一个PIL图像
    x = img_to_array(img)  # 把PIL图像转换成一个numpy数组
    x = x.reshape((1, ) + x.shape)  # 这是一个numpy数组
    image_dir = os.path.dirname(path)
    image_name = os.path.basename(path)
    prefix = image_name.split(".")[0]
    i = 1
    for batch in datagen.flow(x,
                              batch_size=1,
                              save_to_dir=image_save_dir,
                              save_prefix=prefix + "_aug_" + 'Scal',
                              save_format='jpg'):
        i += 1
        if i > aug_number:
            break  # 否则生成器会退出循环
コード例 #4
0
def Random_affine_transform(path, image_save_dir, aug_number):
    datagen = ImageDataGenerator(
        shear_range=3)  # 水平或垂直投影变换,shear_range是角度范围 #5
    img = load_img(path, grayscale=True)  # 这是一个PIL图像
    x = img_to_array(img)  # 把PIL图像转换成一个numpy数组
    x = x.reshape((1, ) + x.shape)  # 这是一个numpy数组
    image_dir = os.path.dirname(path)
    image_name = os.path.basename(path)
    prefix = image_name.split(".")[0]
    i = 1
    for batch in datagen.flow(x,
                              batch_size=1,
                              save_to_dir=image_save_dir,
                              save_prefix=prefix + "_aug_" + 'aff',
                              save_format='jpg'):
        i += 1
        if i > aug_number:
            break  # 否则生成器会退出循环
コード例 #5
0
def Random_translation(path, image_save_dir, aug_number):
    datagen = ImageDataGenerator(width_shift_range=0.05,
                                 height_shift_range=0.05)  # 0.2
    img = load_img(path, grayscale=True)  # 这是一个PIL图像
    x = img_to_array(img)  # 把PIL图像转换成一个numpy数组
    x = x.reshape((1, ) + x.shape)  # 这是一个numpy数组
    image_dir = os.path.dirname(path)
    image_name = os.path.basename(path)
    prefix = image_name.split(".")[0]
    i = 1
    for batch in datagen.flow(x,
                              batch_size=1,
                              save_to_dir=image_save_dir,
                              save_prefix=prefix + "_aug_" + 'trans',
                              save_format='jpg'):
        i += 1
        if i > aug_number:
            break  # 否则生成器会退出循环
 def data_generator(self, indeces=True, channel_mode="channels_last"):
     datagen = ImageDataGenerator(
         featurewise_center=False,  # set input mean to 0 over the dataset
         samplewise_center=False,  # set each sample mean to 0
         featurewise_std_normalization=
         False,  # divide inputs by std of the dataset
         samplewise_std_normalization=False,  # divide each input by its std
         zca_whitening=False,  # apply ZCA whitening
         rotation_range=
         15,  # randomly rotate images in the range (degrees, 0 to 180)
         width_shift_range=
         0.15,  # randomly shift images horizontally (fraction of total width)
         height_shift_range=
         0.15,  # randomly shift images vertically (fraction of total height)
         horizontal_flip=True,  # randomly flip images
         vertical_flip=True,  # randomly flip images
         data_format=channel_mode,  # (row, col, channel) format per image
         get_normal_also=indeces
     )  # Get indeces for unaugmented data as well
     return datagen
コード例 #7
0
    
    pairs = np.array(Parallel(n_jobs=8)(delayed(loadValidationData)(fname, shape) for fname in filelist))
    val_car = pairs[:,0,:,:]
    val_car_mask = pairs[:,1,:,:]
    val_car = val_car.reshape(val_car.shape[0], shape[0], shape[1], 1)
    val_car_mask = val_car_mask.reshape(val_car.shape[0], shape[0], shape[1], 1)
    del pairs

    print "Time elapsed:",  (time.time()-t)/60
    
else:
    print 'Creating validation imageDataGenerators'
    
    val_data_gen_args = dict(rescale = 1./255)
    
    val_image_datagen = ImageDataGenerator(**val_data_gen_args)
    val_mask_datagen = ImageDataGenerator(**val_data_gen_args)
    
    val_dir = 'data/full_' if full else 'data/val_'
    val_dir += str(shape)+rgb_suffix
    
    # Provide the same seed and keyword arguments to the fit and flow methods
    seed = 1
    
    val_image_generator = val_image_datagen.flow_from_directory(
        'data/'+train_prefix+str(shape)+rgb_suffix,
        target_size=shape,
        color_mode = color_mode,
        class_mode = None,
        batch_size = batch_size,
        seed = seed)
コード例 #8
0
# Step 3 - Flattening
classifier.add(Flatten())

# Step 4 - Fully Connected Layer
classifier.add(Dense(units=128, activation='relu'))
classifier.add(Dense(units=6, activation='softmax'))

# Step 5 - Compiling
classifier.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])

from image import ImageDataGenerator

train_datagen = ImageDataGenerator(rescale=1. / 255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)

dev_datagen = ImageDataGenerator(rescale=1. / 255)
test_datagen = ImageDataGenerator(rescale=1. / 255)

training_set = train_datagen.flow_from_directory(
    '/home/osama/Documents/datasets/textile/dataset/training_set/',
    target_size=(128, 128),
    batch_size=1)

dev_set = dev_datagen.flow_from_directory(
    '/home/osama/Documents/datasets/textile/dataset/dev_set',
    target_size=(128, 128),
    batch_size=32,
    class_mode='categorical')
コード例 #9
0
from keras.applications import InceptionResNetV2, Xception
from keras.models import Model, Sequential, load_model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K
import matplotlib.pyplot as plt
from constants import train_data_dir, validation_data_dir

batch_size = 64
image_size = 299

train_data_path = train_data_dir
validation_data_path = validation_data_dir

train_data_generator = ImageDataGenerator(rescale=1. / 255,
                                          contrast_stretching=False,
                                          logarithmic=False,
                                          gamma=False,
                                          equalization=False,
                                          adaptive_equalization=False)

validation_data_generator = ImageDataGenerator(rescale=1. / 255)

train_generator = train_data_generator.flow_from_directory(
    train_data_path,
    target_size=(image_size, image_size),
    batch_size=64,
    class_mode='categorical')

validation_generator = validation_data_generator.flow_from_directory(
    validation_data_path,
    shuffle=False,
    target_size=(image_size, image_size),
              nb_epoch=nb_epoch,
              validation_data=(X_test, Y_test),
              shuffle=True)
else:
    print('Using real-time data augmentation.')
    # this will do preprocessing and realtime data augmentation
    datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=
        False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=
        45.0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=
        0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=
        0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,
        zerosquare=True,
        zerosquareh=noises,
        zerosquarew=noises,
        zerosquareintern=0.0)  # randomly flip images
    # compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied)
    datagen.fit(X_train)
    # fit the model on the batches generated by datagen.flow()

    if weighted:
コード例 #11
0
datas = {'Train': train, 'Test': test, 'Validation': validation}
for data in datas.values():
    data['imageId'] = data['imageId'].astype(np.uint32)

mlb = MultiLabelBinarizer()
train_label = mlb.fit_transform(train['labelId'])

y_test = np.zeros((39706, 228))
x_test = np.arange(y_test.shape[0]) + 1
width = 224

model_name = 'Xception'
# with CustomObjectScope({'f1_loss': f1_loss, 'f1_score': f1_score, 'precision': precision, 'recall': recall}):
#     model = load_model(f'../models/{model_name}_f1.h5')
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
y_pred_test = model.predict_generator(test_datagen.flow(x_test,
                                                        '../data/test_data',
                                                        width,
                                                        y_test,
                                                        batch_size=1,
                                                        shuffle=False),
                                      verbose=1)
np.save(f'../data/json/y_pred_{model_name}', y_pred_test)

# y_pred_test_xe = y_pred_test.copy()
# y_pred_test = (y_pred_test_xe + y_pred_test_in) / 2

y_pred_test1 = np.round(y_pred_test)
where_1 = mlb.inverse_transform(y_pred_test1)
コード例 #12
0
def save_bottleneck_features(train_data_dir,
                             val_data_dir,
                             weights_path=WEIGHTS_PATH_NO_TOP,
                             overwrite=False):
    bottleneck_features_name = "bottleneck_features.h5"
    if os.path.isfile(bottleneck_features_name):
        if overwrite:
            print("Overwriting bottleneck_features.h5")
            os.remove
        else:
            print(
                "bottleneck_features.h5 exists, use overwrite=True to overwrite."
            )
            return
    print(bottleneck_features_name +
          "is being created...~80GB for ImageNet200")
    img_width, img_height = 224, 224

    nb_train_samples = 100000
    nb_val_samples = 10000
    state = [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
    model = bottom_model(state, weights_path=weights_path)

    val_datagen = ImageDataGenerator(rescale=1. / 255)
    seed = 0
    val_generator = val_datagen.flow_from_directory(val_data_dir,
                                                    target_size=(img_height,
                                                                 img_width),
                                                    batch_size=batch_size,
                                                    shuffle=True,
                                                    seed=seed)

    train_datagen = ImageDataGenerator(rescale=1. / 255)

    train_generator = train_datagen.flow_from_directory(
        train_data_dir,
        target_size=(img_height, img_width),
        batch_size=batch_size,
        shuffle=True,
        seed=seed)

    seed = 0

    np.random.seed(seed)
    train_index_array = np.random.permutation(100000)[:nb_train_samples]

    np.random.seed(seed)
    val_index_array = np.random.permutation(10000)[:nb_val_samples]

    chunk = 25
    train_parts = (nb_train_samples // batch_size) // chunk
    train_samples = nb_train_samples // train_parts
    val_parts = (nb_val_samples // batch_size) // chunk
    val_samples = nb_val_samples // val_parts
    last = 0

    with h5py.File(bottleneck_features_name, 'w') as hf:

        train_labels = hf.create_dataset(
            "train_labels",
            data=np.take(train_generator.classes[:nb_train_samples],
                         train_index_array))
        val_labels = hf.create_dataset(
            "val_labels",
            data=np.take(val_generator.classes[:nb_val_samples],
                         val_index_array))

        val = hf.create_dataset("val", (nb_val_samples, 14, 14, 1024),
                                chunks=(64, 14, 14, 1024))

        for i in range(val_parts):
            print("Val done: " + str(100 * i / val_parts) + "%")
            max_q_size = 1
            val[i * val_samples:(i + 1) *
                val_samples, :, :, :] = model.predict_generator(
                    val_generator,
                    val_samples // batch_size,
                    max_q_size=max_q_size)
            val_generator.batch_index -= max_q_size

        train = hf.create_dataset("training", (nb_train_samples, 14, 14, 1024),
                                  chunks=(64, 14, 14, 1024))
        for i in range(train_parts):
            print("Train done: " + str(100 * i / train_parts) + "%")
            max_q_size = 1
            train[i * train_samples:(i + 1) *
                  train_samples, :, :, :] = model.predict_generator(
                      train_generator,
                      train_samples // batch_size,
                      max_q_size=max_q_size)
            # recorrect for the over-calling of predict_generator by max_q_size
            train_generator.batch_index -= max_q_size
コード例 #13
0
ファイル: predict.py プロジェクト: wassname/mammoviews
    class_mode=prms.class_mode,
    classes=prms.classes,
    seed=prms.seed)

norm_params = dict(
    #rescale=prms.scaleup,
    samplewise_center=prms.samplewise_center,
    samplewise_std_normalization=prms.samplewise_center,
    featurewise_center=False,
    featurewise_std_normalization=False,
    zca_whitening=False,
)

# In[23]:

train_datagen = ImageDataGenerator(**norm_params)

train_datagen.preprocessing_function = lambda x: x[..., ::-1, :]  #*2**-8
datagen_train_output = train_datagen.flow_from_directory(
    prms.data_train,
    #stratify = prms.oversampling,
    #sampling_factor=prms.sampling_factor,
    #oversampling=prms.oversampling,
    shuffle=False,
    **flowfromdir_params)
SAMPLES_PER_EPOCH = len(datagen_train_output.filenames)
STEPS_PER_EPOCH = int(np.ceil(SAMPLES_PER_EPOCH / prms.batch_size))


##########################################
def get_predictions(data_dir, preprocessing_function=lambda x: x, model=model):
コード例 #14
0
except:
    last_lr = 0.001
    print('\nLast learning Rate = {}'.format(last_lr))
	
### Compile ###
optimizer = Adam(lr=last_lr, clipnorm=5) #1e-3
model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'],
			  optimizer=optimizer, metrics=['accuracy'], loss_weights=[1.0, 0.4])

### AUGMENTATION ###

# This will do preprocessing and realtime data augmentation:
# data generator for train set
train_datagen = ImageDataGenerator(#horizontal_flip=True,
                        #brightness_range=[1.2,1.2], # 0.5>val<=2 is not working
                        preprocessing_function=preprocess_input
                        #preprocessing_function=imgaug_steroids
                        )

# data generator for test set
validation_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

### class_weights ###
label_wts = np.load('nasnet_data/label_wts.npy')
label_wts = np.ndarray.tolist(label_wts)

### loading file ###

# Link training data
scratch_dir = sys.argv[1]
print ('\nSCRATCH_DIR: {}\n'.format(scratch_dir))
 def data_generator(self, indeces=True, channel_mode="channels_last"):
     datagen = ImageDataGenerator(
         data_format=channel_mode, get_normal_also=indeces
     )  # Get indeces for unaugmented data as well
     return datagen
コード例 #16
0
# model_name = 'InceptionResNetV2'
batch_size, MODEL = batch_size_model[model_name]

# model = build_model(MODEL, width, n_class, model_name, batch_size)
model_name = 'Xception'
with CustomObjectScope({
        'binary_crossentropy_weight': binary_crossentropy_weight,
        'f1_loss': f1_loss,
        'f1_score': f1_score,
        'precision': precision,
        'recall': recall
}):
    model = load_model(f'../models/{model_name}_bcw.h5')

# Load weights
datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

losses = {
    'bcw': binary_crossentropy_weight,
    'f1': f1_loss,
    'bc': 'binary_crossentropy'
}
configs = [('bcw', Adam(lr=1e-5)), ('f1', Adam(lr=1e-5)),
           ('f1', SGD(lr=5e-5, momentum=0.9, nesterov=True))]
for i, config in enumerate(configs):

    print(f'{i + 1} trial')
    # loss_name, opt = ('bcw', Adam(lr=3e-6))
    loss_name, opt = config
    reduce_lr_patience = 2
コード例 #17
0
ファイル: 4_test.py プロジェクト: MinxZ/multi_label
batch_y = y_val
for i, j in enumerate(tqdm(index_array)):
    s_img = cv2.imread(f'../data/val_data/{j+1}.jpg')
    b, g, r = cv2.split(s_img)  # get b,g,r
    rgb_img = cv2.merge([r, g, b])  # switch it to rgb
    x = resizeAndPad(rgb_img, (width, width))
    batch_x[i] = x

model_names = ['Xception_f1_59', 'Xception_f1_5945']
for model_name in model_names:
    with CustomObjectScope({
            'f1_loss': f1_loss,
            'f1_score': f1_score,
            'precision': precision,
            'recall': recall
    }):
        model = load_model(f'../models/{model_name}.h5')

    # y_pred_val = model.predict(batch_x, verbose=1)
    # print(model_name, f1_score(y_val, y_pred_val))

    val_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
    y_pred_val = model.predict_generator(val_datagen.flow(x_val,
                                                          '../data/val_data',
                                                          width,
                                                          y_val,
                                                          batch_size=3,
                                                          shuffle=False),
                                         verbose=1)
    print(model_name, f1_score_np(y_val, y_pred_val))
コード例 #18
0
###############################################################################
# Compiling the network
classifier.compile(optimizer='adam',
                   loss='categorical_crossentropy',
                   metrics=['accuracy'])
###############################################################################
# Preparing the image generator
training_set_path = "/textileconvnets/training_set/"
dev_set_path = "/textileconvnets/dev_set"
#test_set_path = "/home/osama/Documents/datasets/textile/dataset/test_set"

train_datagen = ImageDataGenerator(rescale=1. / 255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   rotation_range=45,
                                   width_shift_range=0.2,
                                   height_shift_range=0.2,
                                   preprocessing_function=image_preprocessing)

dev_datagen = ImageDataGenerator(rescale=1. / 255,
                                 preprocessing_function=image_preprocessing)
#test_datagen = ImageDataGenerator(rescale=1./255)

training_set = train_datagen.flow_from_directory(training_set_path,
                                                 target_size=(128, 128),
                                                 batch_size=27)

dev_set = dev_datagen.flow_from_directory(dev_set_path,
                                          target_size=(128, 128),
コード例 #19
0
        elif prms.preprocessing_function == 'm1p1':
            preprocessing_function = lambda x: x / 128.0 - 1
        else:
            raise ValueError("unknown preprocessing_function")
    else:
        preprocessing_function = lambda x: x

    if prms.data_augmentation:

        print('Using real-time data augmentation.')
        train_datagen = ImageDataGenerator(
            zoom_range=prms.zoom_range,
            fill_mode=prms.fill_mode,
            rotation_range=prms.rotation_range,
            width_shift_range=prms.width_shift_range,
            height_shift_range=prms.height_shift_range,
            horizontal_flip=prms.horizontal_flip,
            vertical_flip=prms.vertical_flip,
            contrast=prms.contrast if "contrast" in prms else None,
            truncate_quantile=prms.truncate_quantile,
            #histeq_alpha=prms.histeq_alpha,
            **norm_params)
    else:
        train_datagen = ImageDataGenerator(**norm_params)

    val_datagen = ImageDataGenerator(**norm_params)

    datagen_train_output = train_datagen.flow_from_directory(
        prms.data_train,
        stratify=prms.oversampling,
        sampling_factor=prms.sampling_factor if prms.oversampling else None,
        oversampling=prms.oversampling,
コード例 #20
0
    wc=(W-w)/2
    Xin=np.squeeze(Xin)
    Xin=np.transpose(Xin,(1,2,0))
    Xout=np.zeros(Xorig.shape,dtype=Xorig.dtype)
    Xout[hc:hc+h,wc:wc+w,:]=Xin
    return Xout
    
# random data generator
datagen = ImageDataGenerator(featurewise_center=False,
        samplewise_center=False,
        featurewise_std_normalization=False,
        samplewise_std_normalization=False,
        zca_whitening=False,
        rotation_range=75,
        width_shift_range=0.1,
        height_shift_range=0.1,
        shear_range=0.05,
        zoom_range=0.01,
        channel_shift_range=0.0,
        fill_mode='nearest',
        cval=0.0,
        horizontal_flip=True,
        vertical_flip=True,
        dim_ordering='th')
        
def iterate_minibatches(inputs1 , targets,  batchsize, shuffle=True, augment=True):
    assert len(inputs1) == len(targets)
 
    if shuffle:
        indices = np.arange(len(inputs1))
        np.random.shuffle(indices)
    for start_idx in range(0, len(inputs1) - batchsize + 1, batchsize):
コード例 #21
0
    'syringe', 't-shirt', 'table', 'tablelamp', 'teacup', 'teapot',
    'teddy-bear', 'telephone', 'tennis-racket', 'tent', 'tiger', 'tire',
    'toilet', 'tomato', 'tooth', 'toothbrush', 'tractor', 'traffic light',
    'train', 'tree', 'trombone', 'trousers', 'truck', 'trumpet', 'tv',
    'umbrella', 'van', 'vase', 'violin', 'walkie talkie', 'wheel',
    'wheelbarrow', 'windmill', 'wine-bottle', 'wineglass', 'wrist-watch',
    'zebra'
]

datagen = ImageDataGenerator(
    featurewise_center=True,  # set input mean to 0 over the dataset
    samplewise_center=True,  # set each sample mean to 0
    featurewise_std_normalization=True,  # divide inputs by std of the dataset
    samplewise_std_normalization=False,  # divide each input by its std
    zca_whitening=False,  # apply ZCA whitening
    rotation_range=20,  # randomly rotate images in the range (degrees, 0 to 180)
    width_shift_range=
    0.2,  # randomly shift images horizontally (fraction of total width)
    height_shift_range=
    0.2,  # randomly shift images vertically (fraction of total height)
    horizontal_flip=True,  # randomly flip images
    vertical_flip=False)  # randomly flip images


def load_dataset_old():
    # We first define some helper functions for supporting both Python 2 and 3.
    if sys.version_info[0] == 2:
        from urllib import urlretrieve
        import cPickle as pickle

        def pickle_load(f, encoding):