feature_pkl_folder = str(sys.argv[2])
    pickle_file_counter = 0
    batch_size = int(sys.argv[3])

    # set display defaults
    #plt.rcParams['figure.figsize'] = (10, 10)        # large images
    #plt.rcParams['image.interpolation'] = 'nearest'  # don't interpolate: show square pixels
    #plt.rcParams['image.cmap'] = 'gray'  # use grayscale output rather than a (potentially misleading) color heatmap

    caffe.set_mode_cpu()

    model_def = 'vgg_face_caffe/vgg_face_caffe/VGG_FACE_deploy.prototxt'
    model_weights = 'vgg_face_caffe/vgg_face_caffe/VGG_FACE.caffemodel'
    #img_pkl_folder = 'fr_train_data_faceIndexPkl/'
    img_pkl_paths = get_files(img_pkl_folder)
    img_path_vec, img_label_vec = load_data_xy(img_pkl_paths)

    img_feature_vec = []
    #create net
    net = caffe.Net(model_def, model_weights, caffe.TEST)

    #mu = np.load('vgg_face_caffe/vgg_face_caffe/ilsvrc_2012_mean.npy')
    #mu = mu.mean(1).mean(1)  # average over pixels to obtain the mean (BGR) pixel values
    #print 'mean-subtracted values:', zip('BGR', mu)
    #mu=[129.1863, 104.7624, 93.5940]
    mu=np.array([93.5940, 104.7624, 129.1863])
    mu = mu.reshape((3,1,1))
    transformer = caffe.io.Transformer({'data':net.blobs['data'].data.shape})
    transformer.set_transpose('data', (2,0,1))  # move image channels to outermost dimension
    #transformer.set_mean('data', mu)            # subtract the dataset-mean value in each channel
    transformer.set_raw_scale('data', 255)      # rescale from [0, 1] to [0, 255]
    keras_model.add_node(layer=Dense(1, activation='sigmoid'), name='dense4', input='dense3')
    keras_model.add_output('output', input='dense4')
    #add soft max  or min_max or maxpooling
    keras_model.compile('adadelta', {'output': 'mean_squared_error'})
    return keras_model

    #history = keras_model.fit({'input1':X_train, 'input2':X2_train, 'output':y_train}, nb_epoch=10)
    #predictions = graph.predict({'input1':X_test, 'input2':X2_test}) # {'output':...}


if __name__ == "__main__":
    train1_data_path = str(sys.argv[1])
    train2_data_path = str(sys.argv[2])
    #read train1_data
    train1_data_path_vec = get_files(train1_data_path)
    train1_data_vec, train1_label_vec = load_data_xy(train1_data_path_vec)
    #read train2_data
    train2_data_path_vec = get_files(train2_data_path)
    train2_data_vec, train2_label_vec = load_data_xy(train2_data_path_vec)

    #genereate output label
    train1_len = len(train1_label_vec)
    train2_len = len(train2_label_vec)
    assert train1_len == train2_len
    train_label_vec = np.zeros(train1_len, dtype='float32')
    for i in range(train1_len):
        if train1_label_vec[i] == train2_label_vec[i]:#the same person
            train_label_vec[i] = 1.
        else:
            train_label_vec[i] = 0.
                         input='dense3')
    keras_model.add_output('output', input='dense4')
    #add soft max  or min_max or maxpooling
    keras_model.compile('adadelta', {'output': 'mean_squared_error'})
    return keras_model

    #history = keras_model.fit({'input1':X_train, 'input2':X2_train, 'output':y_train}, nb_epoch=10)
    #predictions = graph.predict({'input1':X_test, 'input2':X2_test}) # {'output':...}


if __name__ == "__main__":
    train1_data_path = str(sys.argv[1])
    train2_data_path = str(sys.argv[2])
    #read train1_data
    train1_data_path_vec = get_files(train1_data_path)
    train1_data_vec, train1_label_vec = load_data_xy(train1_data_path_vec)
    #read train2_data
    train2_data_path_vec = get_files(train2_data_path)
    train2_data_vec, train2_label_vec = load_data_xy(train2_data_path_vec)

    #genereate output label
    train1_len = len(train1_label_vec)
    train2_len = len(train2_label_vec)
    assert train1_len == train2_len
    train_label_vec = np.zeros(train1_len, dtype='float32')
    for i in range(train1_len):
        if train1_label_vec[i] == train2_label_vec[i]:  #the same person
            train_label_vec[i] = 1.
        else:
            train_label_vec[i] = 0.