Ejemplo n.º 1
0
def fetch(args,dataset="",number=0):
	if not dataset:
		dataset = args.dataset.upper()
	names = ""
	gt = []

	if dataset in ["TURBOFAN","MILL","IGBT"]:
		data,gt,explanations = nasa.main(args)
	elif dataset == "BACKBLAZE":
		data,gt,explanations,names = backblaze.main(args)
	elif dataset == "OCCUPANCY":
		data,gt,explanations,names = occupancy.main(args)
	elif dataset == "DODGERS":
		data,gt,explanations,names = dodgers.main(args)
	elif dataset == "EYE":
		data,gt,explanations,names = eye.main(args)
	#elif dataset == "ARMA_SIM":
	#	data = sim.arma_sim(np.array([1]),np.array([1,0.5,-0.2]),1000,num=5)
	elif dataset == "VARMA_SIM":
		if args.filename:
			data,gt = sim.read(args.filename,args.elemsep,args.linesep)
			data = [pp.normalize(dat) for dat in data]
		else:
			num_timepoints = args.settings["num_timepoints"]
			num_samples = args.settings["num_samples"]
			case = args.settings["case"]
			data = [sim.mixed_varma(num_timepoints,case) for i in range(num_samples)]
			data = [pp.normalize(dat) for dat in data]
			sim.write(data,gt,"VARMA",args)
Ejemplo n.º 2
0
    def test_sine_regression(self):
        errors = []
        for iteration in range(10):
            input_data = np.ones((1, 40)) * np.linspace(0, 1, 40)
            target_data = (np.sin(2 * np.pi * input_data) +
                           np.cos(4 * np.pi * input_data) +
                           np.random.randn(40) * 0.2)

            input_data = np.transpose(input_data)
            target_data = np.transpose(target_data)
            input_data = normalize(input_data)
            target_data = normalize(target_data)

            training_inputs = input_data[0::2, :]
            testing_inputs = input_data[1::4, :]
            validation_inputs = input_data[3::4, :]
            training_targets = target_data[0::2, :]
            testing_targets = target_data[1::4, :]
            validation_targets = target_data[3::4, :]

            neural_net = mlp.MultilayerPerceptron(
                (1, 5, 4, 3, 1),
                Backpropagation(800),
                learner_type=mlp.LearnerType.REGRESSION)

            neural_net.train_with_early_stopping(training_inputs,
                                                 training_targets,
                                                 validation_inputs,
                                                 validation_targets)

            testing_outputs = neural_net.recall(testing_inputs)
            errors.append(0.5 * np.sum((testing_targets - testing_outputs)**2))

        average_error = np.median(errors)
        self.assertLessEqual(average_error, 0.5)
Ejemplo n.º 3
0
    def oct_target(alpha):
        print('Solving ILP for hyperparameter tuning...')
        all_results = []
        tree_depth = tree_depths[0]
        for r in range(val_repeat):
            train_df, val_df = preprocessing.train_test_split(
                train_val_df, split=train_val_ratio, random_state=random_state)
            preprocessing.normalize(train_df, norm_cols=norm_cols)
            preprocessing.normalize(val_df, norm_cols=norm_cols)
            all_results.append(
                get_results(train_df=train_df,
                            test_df=val_df,
                            alpha=alpha,
                            tree_depth=tree_depth,
                            max_time_per_run=max_time_per_run,
                            threads=threads,
                            print_status=print_status,
                            warm_start=warm_start))

        results_df = pd.concat(all_results)
        all_results_df.append(results_df)
        aggregated = calc_mean_accuracy_per_alpha(results_df)
        all_aggregated_df.append(aggregated)
        best_alpha_acc = aggregated.max()['testing_accuracy']
        return best_alpha_acc
Ejemplo n.º 4
0
    def transform(self, X):

        tfidf = np.multiply(X, self.idf_)

        if self.norm == 'l2':
            tfidf = tfidf / normalize(tfidf, p=2, axis=1).reshape((-1, 1))
        elif self.norm == 'l1':
            tfidf = tfidf / normalize(tfidf, p=1, axis=1).reshape((-1, 1))

        return tfidf
Ejemplo n.º 5
0
 def __solve__(self):
     if self.normalize:
         normalize(self.X)
     if self.method == 'gd':
         return gradient_descent(self.X, self.Y, self.reg_score)
     if self.method == 'exact':
         return exact_solution(self.X, self.Y)
     if self.method == 'evolution':
         return de(self.X, self.Y)
     if self.method == 'conj':
         return conjugate_gradients(self.X, self.Y)
def write_results_file_2(results, test_data, test_measures):
    INFOFILE = open("results2.txt", "w", encoding='utf-8')
    #print(len(zip(results, test_data['Tweet text'].values, test_measures["M2"])))
    for result, text, M2 in zip(results, test_data['Tweet text'].values,
                                test_measures["M2"]):
        if result < 0.5:
            output = "0, prob:" + str(result) + " M2:" + str(
                M2) + " " + pre.normalize(text) + "\n"
            INFOFILE.write(output)
        else:
            output = "1, prob:" + str(result) + " M2:" + str(
                M2) + " " + pre.normalize(text) + "\n"
            INFOFILE.write(output)
    INFOFILE.close()
Ejemplo n.º 7
0
def normalize(input_dict):
    """Normalizes all numeric values in the given dataset"""
    instances = input_dict['instances']
    output_dict = {}
    # 1,0 -> normalize to [0,1]; 2,-1 then to [-1,1]
    output_dict['normalized'] = preprocessing.normalize(instances, '-S 2.0 -T -1.0')
    return output_dict
def embed(wavform_slice, rate):  
  norm_wavform_slice = preprocessing.normalize(wavform_slice)
  examples_batch = vggish_input.waveform_to_examples(norm_wavform_slice,rate)
  #print('examples_batch:')
  #print(examples_batch)
  print('examples_batch len: ' + str(len(examples_batch)))

  # Prepare a postprocessor to munge the model embeddings.
  pproc = vggish_postprocess.Postprocessor(FLAGS.pca_params)

  with tf.Graph().as_default(), tf.Session() as sess:
    # Define the model in inference mode, load the checkpoint, and
    # locate input and output tensors.
    vggish_slim.define_vggish_slim(training=False)
    vggish_slim.load_vggish_slim_checkpoint(sess, FLAGS.checkpoint)
    features_tensor = sess.graph.get_tensor_by_name(
    vggish_params.INPUT_TENSOR_NAME)
    embedding_tensor = sess.graph.get_tensor_by_name(
    vggish_params.OUTPUT_TENSOR_NAME)
    
    # Run inference and postprocessing.
    [embedding_batch] = sess.run([embedding_tensor],
                     feed_dict={features_tensor: examples_batch})
    #print('embedding_batch: ')
    #print(embedding_batch)
    #print(embedding_batch.shape)
    postprocessed_batch = pproc.postprocess(embedding_batch)
    print('postprocessed_batch: ')
    print(postprocessed_batch)
    print(postprocessed_batch.shape)
  return postprocessed_batch
Ejemplo n.º 9
0
def create_edges(
    images,
    outputs,
    sigma,
    upt,
    lwt,
    kernel=np.ones((3, 3), np.uint8),
    thresh=0.1
):  # 2 ways: Edges of raw images * outputs or Edges of [raw images*outputs]
    im = images[0].permute(1, 2, 0).data.cpu().numpy()
    out = outputs[0, 1].permute(1, 2, 0).data.cpu().numpy()
    vol_norm_raw = normalize(im)
    vol_norm_raw = np.uint8(vol_norm_raw)
    sitk_img_raw = sitk.GetImageFromArray(vol_norm_raw)
    sitk_img_raw_float = sitk.Cast(sitk_img_raw, sitk.sitkFloat32)
    edges_raw = sitk.CannyEdgeDetection(sitk_img_raw_float,
                                        lowerThreshold=lwt,
                                        upperThreshold=upt,
                                        variance=[sigma, sigma, sigma])
    edges_array_raw = sitk.GetArrayFromImage(edges_raw)
    edges_array_final = ((edges_array_raw * out) > thresh).astype(np.uint8)
    edges_closing = cv2.morphologyEx(edges_array_final,
                                     cv2.MORPH_CLOSE,
                                     kernel,
                                     iterations=1)

    return edges_closing
def write_results_file(results, data, measures):
    PREDICTIONSFILE = open("results1.txt", "w", encoding='utf-8')
    for result, label, text, M1, M2 in zip(results, data['Label'].values,
                                           data['Tweet text'].values,
                                           measures["M1"], measures["M2"]):
        if result < 0.5:
            output = "label:" + str(label) + "/0 prob:" + str(
                result) + " M1:" + str(M1) + " M2:" + str(
                    M2) + " " + pre.normalize(text) + "\n"
            PREDICTIONSFILE.write(output)
        else:
            output = "label:" + str(label) + "/1 prob:" + str(
                result) + " M1:" + str(M1) + " M2:" + str(
                    M2) + " " + pre.normalize(text) + "\n"
            PREDICTIONSFILE.write(output)
    PREDICTIONSFILE.close()
def main():
    parser = argparse.ArgumentParser(description='Generate an ingredient-ID mapping file')
    parser.add_argument('json', help='Input data file')
    parser.add_argument('--threshold', type=int, default=5,
                        help='Cutoff of how many times an ingredient should occur in recipes',)
    parser.add_argument('pkl', help='Output pickle file')
    args = parser.parse_args()

    ingredients_counter = Counter()

    with open(args.json, 'r') as f:
        for line in f:
            recipe = json.loads(line.strip())
            ingredients = recipe['ingredients']

            for ingredient in ingredients:
                for normalized_ingredient in preprocessing.normalize(ingredient):
                    ingredients_counter[normalized_ingredient] += 1

    ingredient_id = 0
    ingredient2id = {}
    for ingredient in sorted(ingredients_counter):
        count = ingredients_counter[ingredient]
        if count < args.threshold:
            continue
        ingredient2id[ingredient] = ingredient_id
        ingredient_id += 1

    id2ingredient = dict((v, k) for k, v in ingredient2id.iteritems())

    with open(args.pkl, 'w') as f:
        pickle.dump({
            'ingredient2id': ingredient2id,
            'id2ingredient': id2ingredient,
        }, f)
Ejemplo n.º 12
0
def create_captions(classes, texts, category2idx, verbose=True, save=True):
    '''
    helper function to create text_c10 folder
    '''
    cls2count = {k.replace(" ", "_"): 1 for k in category2idx}
    filenames = []
    for index, (cls, text) in enumerate(zip(classes, texts)):
        category = cls.replace(" ", "_").replace("&", 'AND')
        cls = cls.replace("&", 'AND')

        dirname = "%.3i.%s" % (category2idx[cls], category)
        filename = "%s_%i.txt" % (category, cls2count[category])

        directory = os.path.join(DATA_PATH, "text_c10/%s" % dirname)
        if not os.path.exists(directory):
            os.makedirs(directory)

        if verbose and (index % 5000) == 0:
            print("%i - %s" % (index, filename))

        if save:
            with open(os.path.join(directory, filename), 'wt') as f:
                f.write("%s\n" % normalize(text))

        filenames.append(os.path.join(dirname, filename))
        cls2count[category] += 1

    return filenames
Ejemplo n.º 13
0
 def create_numpy_arrays(self, preprocess_function):
     # always normalize before doing any other preprocessing
     self.np_values = preprocessing.normalize(np.array(self.values))
     self.np_time_stamps = np.array(self.time_stamps)
     #print("Before pre processing:", self.np_values)
     if preprocess_function:
         self.np_values = preprocess_function(self.np_values,
                                              self.np_time_stamps)
Ejemplo n.º 14
0
 def transform(self, txts):
     res = []
     for txt in txts:
         # see https://github.com/RaRe-Technologies/gensim/issues/447
         self.d2v.random.seed(conf.SEED)
         v = self.d2v.infer_vector(micro_tokenize(normalize(txt)))
         res.append(v)
     return numpy.vstack(res)
Ejemplo n.º 15
0
def fit(d, l, kn):
    global data
    global label
    global k

    data = prepro.normalize(d)
    label = l
    k = kn
Ejemplo n.º 16
0
def load_csv_data_meta(csv_file):
	from sklearn import preprocessing

	df = pd.read_csv(csv_file, delimiter=',', header=None, skiprows=1, names=['name', 'author', 'score', 'body',
     'Class', 'response_count', 'ARI_value', 'polarity', 'BadWords'])
	comments_data = df.drop(['name', 'author', 'body', 'Class'], axis=1)

	comments_data = preprocessing.normalize(comments_data, norm='l2')
	return comments_data
Ejemplo n.º 17
0
def save_edges_output(edges, dir_path, patient_id, object_name):
    edges = normalize(edges)
    edges = edges.astype(np.uint8)
    obj_path = dir_path + str(patient_id) + '/' + object_name + '/'
    if not os.path.exists(obj_path):
        os.makedirs(obj_path)
    for j in range(edges.shape[2]):
        # for cv2.imwrite {0,1} should be mapped to mapped to {0,255}
        cv2.imwrite(obj_path + pad_zerro(j + 1) + ".tiff", edges[:, :, j])
Ejemplo n.º 18
0
    def _parse_function(example):
        features = tf.io.parse_single_example(example, feature_description)
        image = tf.image.decode_jpeg(features['image/encoded'])
        image = normalize(image)
        label = tf.one_hot(features['label'],
                           depth=one_hot_depth,
                           dtype=tf.float32)

        return image, label
Ejemplo n.º 19
0
def determine_threshold(signal, freq, heart_rate):    
    
    signal = pr.normalize(signal)
    i = 0
    
    thresholds = np.arange(threshold_start, threshold_stop, threshold_step)
    thr = 0
    begginings = []
    endings = []
    
    for threshold in thresholds:
        under = 0
        is_above = False
        begginings = []
        endings = []
        i = 0
        for x in signal:
            if x < threshold:
                under = under + 1
                if is_above == True:
                    is_above = False
                    endings.append(i)
            else:
                if is_above == False:
                    is_above = True
                    begginings.append(i)
            i = i + 1
        begginings, endings = investigate_tone_boundaries(begginings, endings)
        n = len(begginings)
        rate = n / (len(signal) * 1.0/ freq) * 30
        
        print str(threshold) + ': ' + str(under * 1.0 / len(signal)) + ' ' + str(n) + ' ' + str(rate)    
          
        if ((1 - rate_confidence) * heart_rate <= rate and (1 + rate_confidence) * heart_rate >= rate):
            print str(threshold) + ' - HERE!'
            thr = threshold
        
#            wo.plot_wave_signal(signal, freq)
#            plt.axhline(y = threshold, xmin = 0, xmax = 3, c = "red", linewidth = 0.5, zorder = 0)
        
            break 
        elif ((2 - rate_confidence) * heart_rate <= rate and (2 + rate_confidence) * heart_rate >= rate):
            heart_rate = 2 * heart_rate
            print str(threshold) + ' - HERE!'
            thr = threshold
        
#            wo.plot_wave_signal(signal, freq)
#            plt.axhline(y = threshold, xmin = 0, xmax = 3, c = "red", linewidth = 0.5, zorder = 0)
        
            break 
        
    n = len(begginings)   
    peaks_energy = np.zeros(n)
    for index in range(0, n - 1):
        peaks_energy[index] = sum(signal[begginings[index] : endings[index]])
        
    return thr, begginings, endings, heart_rate, peaks_energy
Ejemplo n.º 20
0
def threshold_with_custom_threshold(signal, freq, heart_rate, threshold):    
    
    thr = max(signal) * threshold
    signal = pr.normalize(signal)
    i = 0
    signal_type = 0
    # 1 - s1 & s2
    # 2 - only s1

    begginings = []
    endings = []
    
    under = 0
    is_above = False
    begginings = []
    endings = []
    i = 0
    for x in signal:
        if x < threshold:
            under = under + 1
            if is_above == True:
                is_above = False
                if (i - begginings[len(begginings) - 1]) > (freq * 0.02):
                    endings.append(i)
                else:
                    del begginings[-1]
        else:
            if is_above == False:
                is_above = True
                begginings.append(i)
        i = i + 1
    begginings, endings = investigate_tone_boundaries(begginings, endings)
    n = len(begginings)
    rate = (n) / (len(signal) * 1.0/ freq) * 30
        
    print str(threshold) + ': ' + str(under * 1.0 / len(signal)) + ' ' + str(n) + ' ' + str(rate)    
          
    if ((1 - rate_confidence) * heart_rate <= rate and (1 + rate_confidence) * heart_rate >= rate):
        heart_rate = (heart_rate + rate) / 2
        signal_type = 1
    elif ((2 - rate_confidence) * heart_rate <= rate and (2 + rate_confidence) * heart_rate >= rate):
        heart_rate = (2 * heart_rate + rate) / 2
        signal_type = 1
    elif ((1 - 3 * rate_confidence) * heart_rate <= rate and (1 - rate_confidence) * heart_rate >= rate):
        heart_rate = (heart_rate + 2 * rate) / 2
        signal_type = 2
    else:
        signal_type = 3
        
#            wo.plot_wave_signal(signal, freq)
#            plt.axhline(y = threshold, xmin = 0, xmax = 3, c = "red", linewidth = 0.5, zorder = 0)
        
    peaks_energy = np.zeros(n)
    for index in range(0, n - 1):
        peaks_energy[index] = sum(signal[begginings[index] : endings[index]])
        
    return thr, begginings, endings, heart_rate, peaks_energy, signal_type
Ejemplo n.º 21
0
def preprocess(file_path, args):
    # Get sound and sample rate from file using librosa
    try:
        sound, sample_rate = librosa.load(file_path)
    except ZeroDivisionError as e:
        raise ZeroDivisionError("File for error above:", file_path) from e

    # Resampling
    if sample_rate != universal_sample_rate:
        sound = resample(
            sound, int(universal_sample_rate * (len(sound) / sample_rate)))
        pass

    # If argument for noise addition is set, adds random white- or background noise or removes noise
    if args.noise_aug:
        if args.noise_aug == "white_noise":
            if args.n_steps:
                sound = sound_shuffling.add_white_noise(
                    sound,
                    target_snr=np.random.normal(args.n_steps[0],
                                                args.n_steps[1]))
            else:
                sound = sound_shuffling.add_white_noise(
                    sound, target_snr=np.random.normal(4.5, 2.0))
        if args.noise_aug == "background_noise":
            sound = sound_shuffling.add_random_background_noise(
                sound, sample_rate)
        if args.noise_aug == "no_noise":
            sound = preprocessing.extract_noise(sound,
                                                sample_rate,
                                                window_width=2048,
                                                step_size=512,
                                                verbose=False)

    # If argument for shifting is set, shifts amplitude, frequency or time randomly
    if args.shift_aug:
        if args.shift_aug == "amplitude_shift":
            n_steps = random.randint(0, 5)
            sound = sound_shuffling.amplitude_shift(sound, n_steps)
        if args.shift_aug == "frequency_shift":
            n_steps = random.randint(-5, 5)
            sound = sound_shuffling.frequency_shift(sound, sample_rate,
                                                    n_steps)
        if args.shift_aug == "time_stretch":
            n_steps = random.randint(1, 5)
            sound = sound_shuffling.time_stretch(sound, n_steps)

    # Normalize
    sound = preprocessing.normalize(sound)

    # Cut sound up in frames of 5 seconds
    window_width = universal_sample_rate * 5
    step_size = window_width  # TODO: paramererize stepsize
    nr_of_frames, frames = get_frames(sound, window_width, step_size)

    return np.array(frames)
Ejemplo n.º 22
0
def preprocess(row):
    if row[0] and row[1]:
        txt = row[0] + ' ' + row[1]
    elif row[0]:
        txt = row[0]
    elif row[1]:
        txt = row[1]
    else:
        txt = ''
    return micro_tokenize(normalize(txt))
Ejemplo n.º 23
0
    def _preprocess(self, inputs):
        """Preprocess the input images.

        Args:
            inputs: a batch of raw images.

        Returns:
            a batch of processed images as tensors.
        """
        return normalize(inputs)
def load_ingredient2recipes(filename):
    ingredient2recipes = defaultdict(set)
    with open(filename, 'r') as f:
        for line in f:
            recipe = json.loads(line.strip())
            recipe_id = recipe['id']
            ingredients = recipe['ingredients']
            for ingredient in ingredients:
                for normalized_ingredient in preprocessing.normalize(ingredient):
                    ingredient2recipes[normalized_ingredient].add(recipe_id)
    return ingredient2recipes
Ejemplo n.º 25
0
 def process_frame(pose_scores, keypoint_scores, keypoint_coords, frame_num,
                   fps, call_cnt):
     seconds = frame_num / fps
     normalized = normalize(pose_scores, keypoint_scores, keypoint_coords)
     if not normalized:
         return True
     print(f'Inserting #{call_cnt}')
     cursor.execute(insert_sql, (seconds, vid_id, ujson.dumps(normalized)))
     if frame_num % 1000 == 0:
         print('Committing...')
         mydb.commit()
Ejemplo n.º 26
0
    def predict(self, X, ntree_limit=-1):
        X = np.array(X)
        if self.params["normalize"]:
            X = normalize(X)

        if ntree_limit == -1:
            ntree_limit = len(self.model)
        preds = self.model[0].predict(X)
        for ntree in np.arange(1, ntree_limit):
            preds += self.params["learning_rate"] * self.model[ntree].predict(X)
        return preds
Ejemplo n.º 27
0
def load_data(path):
    my_dir = sorted(os.listdir(path))

    data = []
    gt = []

    for p in tqdm(my_dir):
        data_list = sorted(os.listdir(path + p))
        # print("sorted(os.listdir(path+p))",sorted(os.listdir(path+p)))   ['Brats18_2013_0_1_flair.nii.gz', 'Brats18_2013_0_1_seg.nii.gz', 'Brats18_2013_0_1_t1.nii.gz', 'Brats18_2013_0_1_t1ce.nii.gz', 'Brats18_2013_0_1_t2.nii.gz']

        img_itk = sitk.ReadImage(path + p + '/' + data_list[0])
        # print("image path",path + p + '/'+ data_list[0])  Data/Brats2018/LGG/Brats18_2013_0_1/Brats18_2013_0_1_flair.nii.gz
        flair = sitk.GetArrayFromImage(img_itk)
        # print("flair shape",flair.shape)  # (155, 240, 240)
        # print("flair dtype",flair.dtype)  # int16
        flair = normalize(flair)

        img_itk = sitk.ReadImage(path + p + '/' + data_list[1])
        seg = sitk.GetArrayFromImage(img_itk)

        # print("seg shape",seg.shape)  # (155, 240, 240)
        # print("seg dtype",seg.dtype)  # uint8 / int16

        img_itk = sitk.ReadImage(path + p + '/' + data_list[2])
        t1 = sitk.GetArrayFromImage(img_itk)
        t1 = normalize(t1)

        img_itk = sitk.ReadImage(path + p + '/' + data_list[3])
        t1ce = sitk.GetArrayFromImage(img_itk)
        t1ce = normalize(t1ce)

        img_itk = sitk.ReadImage(path + p + '/' + data_list[4])
        t2 = sitk.GetArrayFromImage(img_itk)
        t2 = normalize(t2)

        data.append([flair, t1, t1ce, t2])
        gt.append(seg)

    data = np.asarray(data, dtype=np.float32)
    gt = np.asarray(gt, dtype=np.uint8)
    return data, gt
Ejemplo n.º 28
0
def prepare_embeddings(texts, model, limit=None, batch_size=128):
    # normalize texts
    texts_ = [normalize(text) for text in texts[:limit]]

    hs = []
    for index, batch in enumerate(get_batch(texts_, batch_size)):
        if index and index % 100 == 0:
            print("Processing batch number %i" % index)

        hs.extend([h.reshape(1, -1) for h in model.embed(batch)])

    return hs
Ejemplo n.º 29
0
def process_frame(pose_scores, keypoint_scores, keypoint_coords, frame_num,
                  fps):
    global cnt
    seconds = frame_num / fps
    normalized = normalize(pose_scores, keypoint_scores, keypoint_coords)
    # print(normalized, seconds)
    print('Inserting...')
    cursor.execute(sql, (seconds, 'test_video3', ujson.dumps(normalized)))
    cnt += 1
    if cnt % 500 == 0:
        print('Committing...')
        mydb.commit()
def filter_sound(samples, sampling_rate, window_width=2048, stepsize=512, verbose=False):
	
	noise = get_noise_frames(samples=samples, sampling_rate=sampling_rate, window_width=window_width, stepsize=stepsize, verbose=verbose)
	
	if len(noise) > 0:
		
		reduced_noise = nr.reduce_noise(audio_clip=samples, noise_clip=noise, verbose=verbose)
		
		return preprocessing.normalize(reduced_noise)
	
	else:
		
		return samples
Ejemplo n.º 31
0
def data_generator(data_dir, name, image_size, number_marks, training):
    """A generator function used to make TensorFlow dataset.

    Currently only `universal` dataset (image + json) of FMD is supported.

    Args:
        data_dir: the direcotry of the raw image and json files. 
        name: the name of the dataset.
        image_size: the width and height of the input images for the network.
        number_marks: how many marks/points does one sample contains.
        training: generated data will be used for training or not.

    Yields:
        preprocessed image and heatmaps.
    """

    # Initialize the dataset with files.
    dataset = Universal(name.decode("utf-8"))
    dataset.populate_dataset(data_dir.decode("utf-8"), key_marks_indices=None)
    dataset.meta.update({"num_marks": number_marks})

    image_size = tuple(image_size)
    width, _ = image_size
    for sample in dataset:
        # Follow the official preprocessing implementation.
        image = sample.read_image("RGB")
        marks = sample.marks

        if training:
            # Rotate the image randomly.
            image, marks = rotate_randomly(image, marks, (-30, 30))

            # Scale the image randomly.
            image, marks = scale_randomly(image, marks, output_size=image_size)

            # Flip the image randomly.
            image, marks = flip_randomly(image, marks)
        else:
            # Scale the image to output size.
            marks = marks / image.shape[0] * width
            image = cv2.resize(image, image_size)

        # Normalize the image.
        image_float = normalize(image.astype(float))

        # Generate heatmaps.
        heatmaps = generate_heatmaps(marks, width, (64, 64))
        heatmaps = np.transpose(heatmaps, (1, 2, 0))

        yield image_float, heatmaps
def add_white_noise(samples, target_snr=2):

	# Calculate the root mean square of the samples
	RMS_samples = np.sqrt(np.mean(samples ** 2))

	# Calculate the root mean square of the noise given a target SNR
	RMS_noise = np.sqrt((RMS_samples ** 2) / 10 ** (target_snr / 10))

	# Generate Additive White Gaussian Noise
	noise = np.random.normal(0, RMS_noise, samples.shape[0])

	# Add noise to samples
	samples += noise

	return preprocessing.normalize(samples)
Ejemplo n.º 33
0
def generate_and_save_images(model, epoch, test_input):
    predictions = model(test_input, training=False)

    fig = plt.figure(figsize=(4, 4))

    for i in range(predictions.shape[0]):
        plt.subplot(4, 4, i + 1)
        plt.imshow(normalize(predictions[i, :, :, 0],
                             input_range=(-1, 1),
                             output_range=(0, 255)),
                   cmap='gray')
        plt.axis('off')

    plt.savefig('./images/epoch_{:04d}.png'.format(epoch))
    plt.close()
Ejemplo n.º 34
0
def detectWakewords():
    threading.Timer(0.10, detectWakewords).start()
    global count
    global buff
    p = pyaudio.PyAudio()
    stream = p.open(format=pyaudio.paInt16, channels=1, rate=16000, input=True, frames_per_buffer=CHUNKSIZE)
    data = stream.read(CHUNKSIZE)
    raw_speech = np.fromstring(data, dtype=np.int16)
    if modelname.startswith('cnn'):
        X = normalize(mfcc(raw_speech)).reshape(1, 99, 13, 1)
    else:
        X = normalize(mfcc(raw_speech)).reshape(1, 99 * 13)
    pred = [int(round(x[0])) for x in model.predict(X)]
    if np.sum(np.abs(raw_speech)) < len(buff)*100000:
        pred = [0]
    buff.pop(0)
    buff.append(pred[0])
    if (buff[-min_positives:] == [1]*min_positives and buff[-(min_positives+1)] == 0):
        print("Wake word detected #", count)
        winsound.Beep(1000, 300)
        count+=1        
    stream.stop_stream()
    stream.close()
    p.terminate()
Ejemplo n.º 35
0
def get_training_sample(train, sample_type, normalize):

    if (normalize):
        df_norm = preprocessing.normalize(train)[0]
        train = df_norm

    if sample_type == 'uniform':
        uni_sample = uniform_sampling(train, 17000)
        uni_sample.index = np.arange(0, len(uni_sample))
        return uni_sample
    else:
        choice_sample = choice_sampling(train, 1.6)

        # Selecting  10000 samples from the choice sample
        choice_sample = uniform_sampling(choice_sample, 17000)
        choice_sample.index = np.arange(0, len(choice_sample))
        return choice_sample
def main():
    parser = argparse.ArgumentParser(description='Inspect top n ingredients')
    parser.add_argument('json', help='Input data file')
    parser.add_argument('--n', help='Number of ingredients to print', type=int, default=1000)
    args = parser.parse_args()

    ingredients_counter = Counter()

    with open(args.json, 'r') as f:
        for line in f:
            recipe = json.loads(line.strip())
            ingredients = recipe['ingredients']

            for ingredient in ingredients:
                for normalized_ingredient in preprocessing.normalize(ingredient):
                    ingredients_counter[normalized_ingredient] += 1

    for ingredient, count in ingredients_counter.most_common(args.n):
        print('{}\t{}'.format(ingredient.encode('utf8'), count))
Ejemplo n.º 37
0
def ingredients(recipe):
    ingredientz = recipe['ingredients']
    for ingredient in ingredientz:
        normalized_ingredients = preprocessing.normalize(ingredient)
        for normalized_ingredient in normalized_ingredients:
            yield ('meta', 'ingr', normalized_ingredient)
Ejemplo n.º 38
0
def main(n_z, n_hidden, dataset, seed, comment, gfx=True):
    
    # Initialize logdir
    import time
    logdir = 'results/gpulearn_z_x_'+dataset+'_'+str(n_z)+'-'+str(n_hidden)+'_'+comment+'_'+str(int(time.time()))+'/'
    if not os.path.exists(logdir): os.makedirs(logdir)
    print 'logdir:', logdir
    print 'gpulearn_z_x', n_z, n_hidden, dataset, seed
    with open(logdir+'hook.txt', 'a') as f:
        print >>f, 'learn_z_x', n_z, n_hidden, dataset, seed
    
    np.random.seed(seed)

    gfx_freq = 1
    
    weight_decay = 0
    f_enc, f_dec = lambda x:x, lambda x:x

    # Init data
    if dataset == 'mnist':
        import anglepy.data.mnist as mnist
        
        # MNIST
        size = 28
        train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(size)
        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': valid_x.astype(np.float32)}
        x_test = {'x': test_x.astype(np.float32)}
        L_valid = 1
        dim_input = (size,size)
        n_x = size*size
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        nonlinear = 'softplus'
        type_px = 'bernoulli'
        n_train = 50000
        n_batch = 1000
        colorImg = False
        bernoulli_x = True
        byteToFloat = False
        weight_decay = float(n_batch)/n_train
        
    if dataset == 'mnist_binarized':
        import anglepy.data.mnist_binarized as mnist_binarized
        # MNIST
        train_x, valid_x, test_x = mnist_binarized.load_numpy(28)
        x = {'x': np.hstack((train_x, valid_x)).astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        dim_input = (28,28)
        n_x = 28*28
        n_y = 10
        type_qz = 'gaussianmarg'
        type_pz = 'mog'
        nonlinear = 'rectlin'
        type_px = 'bernoulli'
        n_train = 60000
        n_batch = 1000
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch)/n_train
        
    elif dataset == 'freyface':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy()
        np.random.shuffle(train_x)
        x = {'x': train_x.T[:,0:n_train]}
        x_valid = {'x': train_x.T[:,n_train:]}
        L_valid = 1
        dim_input = (28,20)
        n_x = 20*28
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'bounded01'
        nonlinear = 'tanh'  #tanh works better with freyface #'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch)/n_train

    elif dataset == 'freyface_pca':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy().T
        np.random.shuffle(train_x.T)
        
        f_enc, f_dec, _ = pp.PCA(train_x, 0.99)
        train_x = f_enc(train_x)
        
        x = {'x': train_x[:,0:n_train].astype(np.float32)}
        x_valid = {'x': train_x[:,n_train:].astype(np.float32)}
        L_valid = 1
        dim_input = (28,20)
        n_x = train_x.shape[0]
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False

    elif dataset == 'freyface_bernoulli':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy().T
        np.random.shuffle(train_x.T)
        
        x = {'x': train_x[:,0:n_train].astype(np.float32)}
        x_valid = {'x': train_x[:,n_train:].astype(np.float32)}
        L_valid = 1
        dim_input = (28,20)
        n_x = train_x.shape[0]
        type_pz = 'gaussianmarg'
        type_px = 'bernoulli'
        nonlinear = 'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False

    elif dataset == 'norb':    
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)

        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size,size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900 #23400/900 = 27
        colorImg = False
        #binarize = False
        byteToFloat = False
        bernoulli_x = False
        weight_decay= float(n_batch)/train_x.shape[1]
    
    elif dataset == 'norb_pca':    
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)

        f_enc, f_dec, _ = pp.PCA(train_x, 0.999)
        #f_enc, f_dec, _ = pp.normalize_random(train_x)
        train_x = f_enc(train_x)
        test_x = f_enc(test_x)
        
        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size,size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900 #23400/900 = 27
        colorImg = False
        #binarize = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay= float(n_batch)/train_x.shape[1]

    elif dataset == 'norb_normalized':
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)

        #f_enc, f_dec, _ = pp.PCA(train_x, 0.99)
        #f_enc, f_dec, _ = pp.normalize_random(train_x)
        f_enc, f_dec, _ = pp.normalize(train_x)
        train_x = f_enc(train_x)
        test_x = f_enc(test_x)
        
        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size,size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900 #23400/900 = 27
        colorImg = False
        #binarize = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay= float(n_batch)/train_x.shape[1]
        
    elif dataset == 'svhn':
        # SVHN dataset
        import anglepy.data.svhn as svhn
        size = 32
        train_x, train_y, test_x, test_y = svhn.load_numpy(False, binarize_y=True) #norb.load_resized(size, binarize_y=True)
        extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True)
        x = {'x': np.hstack((train_x, extra_x)), 'y':np.hstack((train_y, extra_y))}
        ndict.shuffleCols(x)
        
        print 'Performing PCA, can take a few minutes... ',
        f_enc, f_dec, pca_params = pp.PCA(x['x'][:,:10000], cutoff=600, toFloat=True)
        ndict.savez(pca_params, logdir+'pca_params')
        print 'Done.'
        
        n_y = 10
        x = {'x': f_enc(x['x']).astype(np.float32)}
        x_valid = {'x': f_enc(test_x).astype(np.float32)}
        L_valid = 1
        n_x = x['x'].shape[0]
        dim_input = (size,size)
        n_batch = 5000
        colorImg = True
        bernoulli_x = False
        byteToFloat = False
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
    
        
    # Construct model
    from anglepy.models import GPUVAE_Z_X
    updates = get_adam_optimizer(learning_rate=3e-4, weight_decay=weight_decay)
    model = GPUVAE_Z_X(updates, n_x, n_hidden, n_z, n_hidden[::-1], nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=100, init_sd=1e-3)
    
    if False:
        #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412689061/'
        #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412676966/'
        #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412695481/'
        #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412695455/'
        #dir = '/Users/dpkingma/results/gpulearn_z_x_svhn_pca_300-(500, 500)__1413904756/'
        dir = '/home/ubuntu/results/gpulearn_z_x_mnist_50-[500, 500]__1414259423/'
        w = ndict.loadz(dir+'w_best.ndict.tar.gz')
        v = ndict.loadz(dir+'v_best.ndict.tar.gz')
        ndict.set_value(model.w, w)
        ndict.set_value(model.v, v)
    
    # Some statistics for optimization
    ll_valid_stats = [-1e99, 0]
    
    # Progress hook
    def hook(epoch, t, ll):
        
        if epoch%10 != 0: return
        
        ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat)
        
        # Log
        ndict.savez(ndict.get_value(model.v), logdir+'v')
        ndict.savez(ndict.get_value(model.w), logdir+'w')
        
        if ll_valid > ll_valid_stats[0]:
            ll_valid_stats[0] = ll_valid
            ll_valid_stats[1] = 0
            ndict.savez(ndict.get_value(model.v), logdir+'v_best')
            ndict.savez(ndict.get_value(model.w), logdir+'w_best')
        else:
            ll_valid_stats[1] += 1
            # Stop when not improving validation set performance in 100 iterations
            if ll_valid_stats[1] > 1000:
                print "Finished"
                with open(logdir+'hook.txt', 'a') as f:
                    print >>f, "Finished"
                exit()
        
        print epoch, t, ll, ll_valid, ll_valid_stats
        with open(logdir+'hook.txt', 'a') as f:
            print >>f, epoch, t, ll, ll_valid, ll_valid_stats

        # Graphics
        if gfx and epoch%gfx_freq == 0:
            
            #tail = '.png'
            tail = '-'+str(epoch)+'.png'
            
            v = {i: model.v[i].get_value() for i in model.v}
            w = {i: model.w[i].get_value() for i in model.w}
                
            if 'pca' not in dataset and 'random' not in dataset and 'normalized' not in dataset:
                
                if 'w0' in v:
                    image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg)
                    image.save(logdir+'q_w0'+tail, 'PNG')
                
                image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg)
                image.save(logdir+'out_w'+tail, 'PNG')
                
                if 'out_unif' in w:
                    image = paramgraphics.mat_to_img(f_dec(w['out_unif'].reshape((-1,1))), dim_input, True, colorImg=colorImg)
                    image.save(logdir+'out_unif'+tail, 'PNG')
                
                if n_z == 2:
                    n_width = 10
                    import scipy.stats
                    z = {'z':np.zeros((2,n_width**2))}
                    for i in range(0,n_width):
                        for j in range(0,n_width):
                            z['z'][0,n_width*i+j] = scipy.stats.norm.ppf(float(i)/n_width+0.5/n_width)
                            z['z'][1,n_width*i+j] = scipy.stats.norm.ppf(float(j)/n_width+0.5/n_width)
                    
                    x, _, _z = model.gen_xz({}, z, n_width**2)
                    if dataset == 'mnist':
                        x = 1 - _z['x']
                    image = paramgraphics.mat_to_img(f_dec(_z['x']), dim_input)
                    image.save(logdir+'2dmanifold'+tail, 'PNG')
                else:
                    _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144)
                    x_samples = _z_confab['x']
                    image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg)
                    image.save(logdir+'samples'+tail, 'PNG')
                    
                    #x_samples = _x['x']
                    #image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg)
                    #image.save(logdir+'samples2'+tail, 'PNG')
                    
            else:
                # Model with preprocessing
                
                if 'w0' in v:
                    image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg)
                    image.save(logdir+'q_w0'+tail, 'PNG')
                    
                image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg)
                image.save(logdir+'out_w'+tail, 'PNG')

                _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144)
                x_samples = f_dec(_z_confab['x'])
                x_samples = np.minimum(np.maximum(x_samples, 0), 1)
                image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg)
                image.save(logdir+'samples'+tail, 'PNG')
                
                
                
    # Optimize
    #SFO
    dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat)
    loop_va(dostep, hook)
    
    pass
_model = "./output/5"
_name_filter = ["KK201617T1", "KK201617T2"]

_words = []
_norm_dict = None
pca_components = None
model = None

with open(_model+"/preprocess.json", "r") as f:
	preprocess_dict = json.load(f)
	_words = preprocess_dict["words"]
	if "norm_info" in preprocess_dict:
		_norm_dict = preprocess_dict["norm_info"]
	if preprocess_dict["pca"]:
		pca_components = np.load(_model+'/pca.npy')

def get_label(sample):
	#return sample.think + sample.understand + sample.lang + sample.pres
	return sample.think + sample.understand

samples = preprocessing.tp_sample.get_samples(_sample_folder)
texts = [sample.comment for sample in samples if sample.batch_name in _name_filter]
test_matrix, _, _ = preprocessing.preprocess(texts, words_src = _words)
if pca_components is not None:
	test_matrix = np.matmul(test_matrix, pca_components.T)
if _norm_dict is not None:
	test_matrix, _, _ = preprocessing.normalize(test_matrix, norm_info = _norm_dict)
model = models.SVR.load(_model)
result = model.predict(test_matrix)
print([get_label(sample) for sample in samples])
print(result)
Ejemplo n.º 40
0
def main(n_z, n_hidden, dataset, seed, comment, gfx=True):
  # Initialize logdir
  import time
  pre_dir = 'models/gpulearn_z_x_mnist_96-(500, 500)'
  
  if os.environ.has_key('pretrain') and bool(int(os.environ['pretrain'])) == True:
    comment+='_pre-train'
  if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True:
    comment+='_prior'
    pre_dir+='_prior'
  if os.environ.has_key('cutoff'):
    comment+=('_'+str(int(os.environ['cutoff'])))
  if os.environ.has_key('train_residual') and bool(int(os.environ['train_residual'])) == True:
    comment+='_train-residual'
    pre_dir+='_train-residual'
  if os.environ.has_key('sigma_square'):
    comment+=('_'+str(float(os.environ['sigma_square'])))
    pre_dir+=('_'+str(float(os.environ['sigma_square'])))
  pre_dir+='/'
  logdir = 'results/gpulearn_z_x_'+dataset+'_'+str(n_z)+'-'+str(n_hidden)+comment+'_'+str(int(time.time()))+'/'
  if not os.path.exists(logdir): os.makedirs(logdir)
  print 'logdir:', logdir
  print 'gpulearn_z_x', n_z, n_hidden, dataset, seed
  with open(logdir+'hook.txt', 'a') as f:
    print >>f, 'learn_z_x', n_z, n_hidden, dataset, seed
  
  np.random.seed(seed)

  gfx_freq = 1
  
  weight_decay = 0
  
  # Init data
  if dataset == 'mnist':
    import anglepy.data.mnist as mnist
    
    # MNIST
    size = 28
    train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(size)
    f_enc, f_dec = pp.Identity()
    
    if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True:
        color.printBlue('Loading prior')
        mnist_prior = sio.loadmat('data/mnist_prior/mnist_prior.mat')
        train_mean_prior = mnist_prior['z_train']
        test_mean_prior = mnist_prior['z_test']
        valid_mean_prior = mnist_prior['z_valid']
    else:
        train_mean_prior = np.zeros((n_z,train_x.shape[1]))
        test_mean_prior = np.zeros((n_z,test_x.shape[1]))
        valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    
    print '---------------------', type(train_x)

    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    
    print '---------------------', type(x_train)

    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 50000
    n_test = 10000
    n_valid = 10000
    n_batch = 1000
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
  elif dataset == 'higgs':
    size = 28
    f_enc, f_dec = pp.Identity()
    
    inputfile = 'data/higgs/HIGGS.csv'
    print 'loading file.'
    x = np.loadtxt(inputfile, dtype='f4', delimiter=',')
    print 'done.'
    y = x[:,0].reshape((-1,1))
    x = x[:,1:]
    x = np.array(x, dtype='float32')
    y = np.array(y, dtype='float32')
    n_train = 10000000 
    n_valid = 500000
    n_test  = 500000
    n_batch = 1000
    derived_feat = 'all'
    if os.environ.has_key('derived_feat'):
        derived_feat = os.environ['derived_feat']
        color.printBlue(derived_feat)
        
    if derived_feat == 'high':
        # Only the 7 high level features.
        x = x[:, 21:28]
    elif derived_feat == 'low':
        # Only the 21 raw features.
        x = x[:, 0:21]
    else:
        pass
    
    train_x = x[0:n_train, :].T
    y_train = y[0:n_train, :]
    valid_x = x[n_train:n_train+n_valid, :].T
    y_valid = y[n_train:n_train+n_valid, :]
    test_x = x[n_train+n_valid:n_train+n_valid+n_test, :].T
    y_test = y[n_train+n_valid:n_train+n_valid+n_test, :]
    n_y = 2
    n_x = train_x.shape[0]
    
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))

    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    
    nonlinear = 'tanh'
    if os.environ.has_key('nonlinear'):
        nonlinear = os.environ['nonlinear']
        color.printBlue(nonlinear)
    
    L_valid = 1
    dim_input = (1,size)
    type_px = 'gaussian'
    colorImg = False
    bernoulli_x = False
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'cifar10':
    import anglepy.data.cifar10 as cifar10
    size = 32
    train_x, train_y, test_x, test_y = cifar10.load_numpy()
    train_x = train_x.astype(np.float32).T
    test_x = test_x.astype(np.float32).T
    
    ## 
    f_enc, f_dec = pp.Identity()
    
    if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True:
        color.printBlue('Loading prior')
        cifar_prior = sio.loadmat('data/cifar10_prior/cifar10_prior.mat')
        train_mean_prior = cifar_prior['z_train']
        test_mean_prior = cifar_prior['z_test']
    else:
        train_mean_prior = np.zeros((n_z,train_x.shape[1]))
        test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    
    
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    x_valid = x_test
    
    L_valid = 1
    n_y = 10
    dim_input = (size,size)
    n_x = x['x'].shape[0]
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'gaussian'
    if os.environ.has_key('type_px'):
        type_px = os.environ['type_px']
        color.printBlue('Generative type: '+type_px)
    n_train = 50000
    n_test = 10000
    n_batch = 5000
    colorImg = True
    bernoulli_x = False
    byteToFloat = False
    #weight_decay = float(n_batch)/n_train
    
  elif dataset == 'cifar10_zca':
    import anglepy.data.cifar10 as cifar10
    size = 32
    train_x, train_y, test_x, test_y = cifar10.load_numpy()
    train_x = train_x.astype(np.float32).T
    test_x = test_x.astype(np.float32).T
    
    ## 
    f_enc, f_dec = pp.Identity()
    zca_mean, zca_w, zca_winv = cifar10.zca(train_x)
    train_x = zca_w.dot(train_x-zca_mean)
    test_x = zca_w.dot(test_x-zca_mean)
    
    if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True:
        color.printBlue('Loading prior')
        cifar_prior = sio.loadmat('data/cifar10_prior/cifar10_prior.mat')
        train_mean_prior = cifar_prior['z_train']
        test_mean_prior = cifar_prior['z_test']
    else:
        train_mean_prior = np.zeros((n_z,train_x.shape[1]))
        test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    
    
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    x_valid = x_test
    
    L_valid = 1
    dim_input = (size,size)
    n_y = 10
    n_x = x['x'].shape[0]
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'gaussian'
    n_train = 50000
    n_test = 10000
    n_batch = 5000
    colorImg = True
    bernoulli_x = False
    byteToFloat = False
    if os.environ.has_key('type_px'):
        type_px = os.environ['type_px']
        color.printBlue('Generative type: '+type_px)
        
    nonlinear = 'softplus'
    
  elif dataset == 'mnist_basic': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_'
    tmp = sio.loadmat(data_dir+'train.mat')
    #color.printRed(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
  
  elif dataset == 'rectangle': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'rectangles_'
    tmp = sio.loadmat(data_dir+'train.mat')
    color.printRed(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,1000:]
    valid_y = train_y[1000:]
    train_x = train_x[:,:1000]
    train_y = train_y[:1000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 2
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 1000
    n_valid = 200
    n_test = 50000
    n_batch = 500
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    #print '3', n_x
    
  elif dataset == 'convex': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'convex_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,6000:]
    valid_y = train_y[6000:]
    train_x = train_x[:,:6000]
    train_y = train_y[:6000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 2
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 6000
    n_valid = 2000
    n_test = 50000
    n_batch = 120
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'rectangle_image': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'rectangles_im_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 2
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'mnist_rot':
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_all_rotation_normalized_float_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    
    f_enc, f_dec = pp.Identity()
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    
    
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'mnist_back_rand': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_background_random_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    
    f_enc, f_dec = pp.Identity()
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'mnist_back_image': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_background_images_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    
    f_enc, f_dec = pp.Identity()
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'mnist_back_image_rot': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_all_background_images_rotation_normalized_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    
    f_enc, f_dec = pp.Identity()
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'mnist_binarized':
    #import anglepy.data.mnist_binarized as mnist_binarized
    # MNIST
    import anglepy.data.mnist as mnist
    
    size = 28
    
    data_dir = '/home/lichongxuan/regbayes2/data/mat_data/'+'binarized_mnist_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    #train_y = tmp['t_train'].T.astype(np.int32)
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    tmp = sio.loadmat(data_dir+'valid.mat')
    #print tmp.keys()
    valid_x = tmp['x_valid'].T
    #test_y = tmp['t_test'].T.astype(np.int32)
    
    f_enc, f_dec = pp.Identity()
    
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    
    train_x = np.hstack((train_x, valid_x)).astype(np.float32)
    train_mean_prior = np.hstack((train_mean_prior,valid_mean_prior)).astype(np.float32)
    
    print train_mean_prior.shape
    print train_x.shape
    
    x = {'x': train_x.astype(np.float32), 'mean_prior':train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': test_x.astype(np.float32),'mean_prior':test_mean_prior.astype(np.float32)}
    x_test = x_valid
    
    L_valid = 1
    dim_input = (28,28)
    n_x = 28*28
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 60000
    n_valid = 10000
    n_batch = 1000
    colorImg = False
    bernoulli_x = False
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'mnist_binarized_own':
    #import anglepy.data.mnist_binarized as mnist_binarized
    # MNIST
    import anglepy.data.mnist as mnist
    
    size = 28
    
    data_dir = 'data/mnist_binarized_own/'+'binarized_mnist_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['train_x'].T
    #train_y = tmp['t_train'].T.astype(np.int32)
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['test_x'].T
    tmp = sio.loadmat(data_dir+'valid.mat')
    #print tmp.keys()
    valid_x = tmp['valid_x'].T
    #test_y = tmp['t_test'].T.astype(np.int32)
    
    f_enc, f_dec = pp.Identity()
    
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    
    train_x = np.hstack((train_x, valid_x)).astype(np.float32)
    train_mean_prior = np.hstack((train_mean_prior,valid_mean_prior)).astype(np.float32)
    
    print train_mean_prior.shape
    print train_x.shape
    
    x = {'x': train_x.astype(np.float32), 'mean_prior':train_mean_prior.astype(np.float32)}
    x_train = x
    x_valid = {'x': test_x.astype(np.float32),'mean_prior':test_mean_prior.astype(np.float32)}
    x_test = x_valid
    
    L_valid = 1
    dim_input = (28,28)
    n_x = 28*28
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 60000
    n_valid = 10000
    n_batch = 1000
    colorImg = False
    bernoulli_x = False
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
  
  elif dataset == 'freyface':
    # Frey's face
    import anglepy.data.freyface as freyface
    n_train = 1600
    train_x = freyface.load_numpy()
    np.random.shuffle(train_x)
    x = {'x': train_x.T[:,0:n_train]}
    x_valid = {'x': train_x.T[:,n_train:]}
    L_valid = 1
    dim_input = (28,20)
    n_x = 20*28
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    type_px = 'bounded01'
    nonlinear = 'tanh'  #tanh works better with freyface #'softplus'
    n_batch = 100
    colorImg = False
    bernoulli_x = False
    byteToFloat = False
    weight_decay = float(n_batch)/n_train

  elif dataset == 'freyface_pca':
    # Frey's face
    import anglepy.data.freyface as freyface
    n_train = 1600
    train_x = freyface.load_numpy().T
    np.random.shuffle(train_x.T)
    
    f_enc, f_dec, _ = pp.PCA(train_x, 0.99)
    train_x = f_enc(train_x)
    
    x = {'x': train_x[:,0:n_train].astype(np.float32)}
    x_valid = {'x': train_x[:,n_train:].astype(np.float32)}
    L_valid = 1
    dim_input = (28,20)
    n_x = train_x.shape[0]
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    type_px = 'gaussian'
    nonlinear = 'softplus'
    n_batch = 100
    colorImg = False
    bernoulli_x = False
    byteToFloat = False

  elif dataset == 'freyface_bernoulli':
    # Frey's face
    import anglepy.data.freyface as freyface
    n_train = 1600
    train_x = freyface.load_numpy().T
    np.random.shuffle(train_x.T)
    
    x = {'x': train_x[:,0:n_train].astype(np.float32)}
    x_valid = {'x': train_x[:,n_train:].astype(np.float32)}
    L_valid = 1
    dim_input = (28,20)
    n_x = train_x.shape[0]
    type_pz = 'gaussianmarg'
    type_px = 'bernoulli'
    nonlinear = 'softplus'
    n_batch = 100
    colorImg = False
    bernoulli_x = False
    byteToFloat = False
  
  elif dataset == 'norb_48_24300_pca':
    size = 48
    
    train_x, train_y, test_x, test_y = np.load('data/norb/norb_48_24300.npy')
    
    _x = {'x': train_x, 'y': train_y}
    #ndict.shuffleCols(_x)
    #train_x = _x['x']
    #train_y = _x['y']
    
    
    #print _x['x'][:,:10000].shape
    
    # Do PCA
    print 'pca'
    f_enc, f_dec, pca_params = pp.PCA(_x['x'][:,:10000], cutoff=500, toFloat=False)
    ndict.savez(pca_params, logdir+'pca_params')
    print 'done'
    
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    
    x = {'x': f_enc(train_x).astype(np.float32), 'mean_prior' : train_mean_prior.astype(np.float32)}
    x_valid = {'x': f_enc(test_x).astype(np.float32), 'mean_prior' : test_mean_prior.astype(np.float32)}
    x_test = {'x': f_enc(test_x).astype(np.float32), 'mean_prior' : test_mean_prior.astype(np.float32)}
    
    x_train = x
    
    print x['x'].shape
    print x['mean_prior'].shape
    
    
    L_valid = 1
    n_y = 5
    n_x = x['x'].shape[0]
    dim_input = (size,size)
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    type_px = 'gaussian'
    nonlinear = 'softplus'
    n_batch = 900 #23400/900 = 27
    colorImg = False
    #binarize = False
    bernoulli_x = False
    byteToFloat = False
    weight_decay= float(n_batch)/train_x.shape[1]
    
  elif dataset == 'norb':  
    # small NORB dataset
    import anglepy.data.norb as norb
    size = 48
    train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)

    x = {'x': train_x.astype(np.float32)}
    x_valid = {'x': test_x.astype(np.float32)}
    L_valid = 1
    n_x = train_x.shape[0]
    dim_input = (size,size)
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    type_px = 'gaussian'
    nonlinear = 'softplus'
    n_batch = 900 #23400/900 = 27
    colorImg = False
    #binarize = False
    byteToFloat = False
    bernoulli_x = False
    weight_decay= float(n_batch)/train_x.shape[1]
  
  elif dataset == 'norb_pca':  
    # small NORB dataset
    import anglepy.data.norb as norb
    size = 48
    train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)

    f_enc, f_dec, _ = pp.PCA(train_x, 0.999)
    #f_enc, f_dec, _ = pp.normalize_random(train_x)
    train_x = f_enc(train_x)
    test_x = f_enc(test_x)
    
    x = {'x': train_x.astype(np.float32)}
    x_valid = {'x': test_x.astype(np.float32)}
    L_valid = 1
    n_x = train_x.shape[0]
    dim_input = (size,size)
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    type_px = 'gaussian'
    nonlinear = 'softplus'
    n_batch = 900 #23400/900 = 27
    colorImg = False
    #binarize = False
    bernoulli_x = False
    byteToFloat = False
    weight_decay= float(n_batch)/train_x.shape[1]

  elif dataset == 'norb_normalized':
    # small NORB dataset
    import anglepy.data.norb as norb
    size = 48
    train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)

    #f_enc, f_dec, _ = pp.PCA(train_x, 0.99)
    #f_enc, f_dec, _ = pp.normalize_random(train_x)
    f_enc, f_dec, _ = pp.normalize(train_x)
    train_x = f_enc(train_x)
    test_x = f_enc(test_x)
    
    x = {'x': train_x.astype(np.float32)}
    x_valid = {'x': test_x.astype(np.float32)}
    L_valid = 1
    n_x = train_x.shape[0]
    dim_input = (size,size)
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    type_px = 'gaussian'
    nonlinear = 'softplus'
    n_batch = 900 #23400/900 = 27
    colorImg = False
    #binarize = False
    bernoulli_x = False
    byteToFloat = False
    weight_decay= float(n_batch)/train_x.shape[1]
    
  elif dataset == 'svhn':
    # SVHN dataset
    #import anglepy.data.svhn as svhn
    
    size = 32
    train_x, train_y, test_x, test_y = np.load('data/svhn/svhn.npy')
    #extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True)
    #x = {'x': np.hstack((train_x, extra_x)), 'y':np.hstack((train_y, extra_y))}
    #ndict.shuffleCols(x)
    x = {'x' : train_x, 'y': train_y}
    
    print 'Performing PCA, can take a few minutes... '
    cutoff = 300
    if os.environ.has_key('cutoff'):
        cutoff = int(os.environ['cutoff'])
        color.printBlue('cutoff: '+str(cutoff))
        
    f_enc, f_dec, pca_params = pp.PCA(x['x'][:,:10000], cutoff=cutoff, toFloat=True)
    ndict.savez(pca_params, logdir+'pca_params')
    print 'Done.'
    n_y = 10
    
    if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True:
        color.printBlue('Loading prior')
        train_mean_prior, train_y1, test_mean_prior, test_y1 = np.load('data/svhn/svhn_prior.npy')
        print np.sum((train_y1 == train_y).astype(np.int32))
        print np.sum((test_y1 == test_y).astype(np.int32))
        
    else:
        train_mean_prior = np.zeros((n_z,train_x.shape[1]))
        test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    
    x = {'x': f_enc(x['x']).astype(np.float32), 'mean_prior':train_mean_prior.astype(np.float32)}
    x_train = x
    x_test = {'x': f_enc(test_x).astype(np.float32), 'mean_prior':test_mean_prior.astype(np.float32)}
    x_valid = x_test
    
    print x_train['x'].shape
    print x_test['x'].shape
    print train_y.shape
    print test_y.shape
    print x_train['mean_prior'].shape
    print x_test['mean_prior'].shape
    
    L_valid = 1
    n_x = x['x'].shape[0]
    dim_input = (size,size)
    n_batch = 5000
    n_train = 604388
    n_valid = 26032 
    n_test = 26032 
    colorImg = True
    bernoulli_x = False
    byteToFloat = False
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    type_px = 'gaussian'
    nonlinear = 'softplus'

  else:
    print 'invalid data set'
    exit()
  #print '2', n_x
  
  # Construct model
  from anglepy.models import GPUVAE_Z_X
  learning_rate1 = 3e-4
  if os.environ.has_key('stepsize'):
    learning_rate1 = float(os.environ['stepsize'])
    color.printBlue(str(learning_rate1))
  if os.environ.has_key('preoption'):
    pre = int(os.environ['preoption'])
    if pre == 1:
        updates = get_adam_optimizer(learning_rate=3e-4, decay1=0.9, decay2=0.999, weight_decay=0)
    elif pre ==2:
        updates = get_adam_optimizer(learning_rate=3e-4, decay1=0.9, decay2=0.999, weight_decay=weight_decay)
    else:
        raise Exception('Prepotion unknown')
    with open(logdir+'hook.txt', 'a') as f:
      print >>f, 'preoption ' + str(pre)
  else:
    updates = get_adam_optimizer(learning_rate=learning_rate1, weight_decay=weight_decay)
  #print '1', n_x
  
  model = GPUVAE_Z_X(updates, n_x, n_hidden, n_z, n_hidden[::-1], nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=100, init_sd=1e-3)
  
  if os.environ.has_key('pretrain') and bool(int(os.environ['pretrain'])) == True:
    #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412689061/'
    #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412676966/'
    #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412695481/'
    #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412695455/'
    #dir = '/Users/dpkingma/results/gpulearn_z_x_svhn_pca_300-(500, 500)__1413904756/'
    
    if len(n_hidden) == 1:
        color.printBlue('pre-training-1-layer')
        layer_str = '-500'
    elif len(n_hidden) == 2:
        color.printBlue('pre-training-2-layers')
        layer_str = '-(500, 500)'
    else:
        raise Exception()
        
    pre_str = 'models/gpulearn_z_x_'
    if dataset == 'mnist':
      #dir = pre_str + 'mnist_'+str(n_z)+layer_str+'_longrun/'
      dir = 'models/mnist_z_x_50-500-500_longrun/'
    elif dataset == 'mnist_rot':
      dir = pre_str + 'mnist_rot_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'mnist_back_rand':
      dir = pre_str + 'mnist_back_rand_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'mnist_back_image':
      dir = pre_str + 'mnist_back_image_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'mnist_back_image_rot':
      dir = pre_str + 'mnist_back_image_rot_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'rectangle':
      dir = pre_str + 'rectangle_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'rectangle_image':
      dir = pre_str + 'rectangle_image_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'convex':
      dir = pre_str + 'convex_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'mnist_basic':
      dir = pre_str + 'mnist_basic_'+str(n_z)+layer_str+'_longrun/'

    
    if dataset == 'svhn':
        if (os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True):
            print 'prior-------------------'
            pre_dir = 'results/gpulearn_z_x_svhn_'+str(n_z)+'-500-500_prior_'+str(cutoff)+'_longrun/'
        else:
            pre_dir = 'results/gpulearn_z_x_svhn_'+str(n_z)+'-500-500_'+str(cutoff)+'_longrun/'
            
        color.printBlue(pre_dir)    
        w = ndict.loadz(pre_dir+'w_best.ndict.tar.gz')
        v = ndict.loadz(pre_dir+'v_best.ndict.tar.gz')
            
    elif n_z == 50:
        print 'n_z = 50', dir
        w = ndict.loadz(dir+'w_best.ndict.tar.gz')
        v = ndict.loadz(dir+'v_best.ndict.tar.gz')
    else:
        print 'n_z != 50'
        w = ndict.loadz(pre_dir+'w_best.ndict.tar.gz')
        v = ndict.loadz(pre_dir+'v_best.ndict.tar.gz')
    ndict.set_value2(model.w, w)
    ndict.set_value2(model.v, v)
  
  # Some statistics for optimization
  ll_valid_stats = [-1e99, 0]
  
  # Progress hook
  def hook(epoch, t, ll):
    
    if epoch%10 != 0: return
    
    n_batch_n = n_batch
    if n_batch_n > n_valid:
        n_batch_n = n_valid
    ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch_n, byteToFloat=byteToFloat)
    ll_test = ll_valid
    #if not dataset == 'mnist_binarized':
    if not dataset == 'svhn':
        ll_test, _ = model.est_loglik(x_test, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat)
    
    # Log
    ndict.savez(ndict.get_value(model.v), logdir+'v')
    ndict.savez(ndict.get_value(model.w), logdir+'w')
    
    def infer(data, n_batch=1000):
        #print '--', n_batch
        size = data['x'].shape[1]
        res = np.zeros((sum(n_hidden), size))
        res1 = np.zeros((n_z,size))
        res2 = np.zeros((n_hidden[-1],size))
        res3 = np.zeros((n_z,size))
        for i in range(0, size, n_batch):
          idx_to = min(size, i+n_batch)
          x_batch = ndict.getCols(data, i, idx_to)
          
          # may have bugs
          nn_batch = idx_to - i
          
          _x, _z, _z_confab = model.gen_xz(x_batch, {}, nn_batch)
          x_samples = _z_confab['x']
          for (hi, hidden) in enumerate(_z_confab['hidden']):
            res[sum(n_hidden[:hi]):sum(n_hidden[:hi+1]),i:i+nn_batch] = hidden
          res1[:,i:i+nn_batch] = _z_confab['mean']
          res2[:,i:i+nn_batch] = _z_confab['hidden'][-1]
          res3[:,i:i+nn_batch] = _z_confab['logvar']
        #print '--'             
        return res, res1, res2, res3
          
    #print '..', n_batch
    #if not dataset == 'mnist_binarized':
    if not dataset == 'svhn':
        z_test, z_test1, z_test2, vv_test = infer(x_test)
        z_train, z_train1, z_train2, vv_train = infer(x_train)
    
    
    if ll_valid > ll_valid_stats[0]:
      ll_valid_stats[0] = ll_valid
      ll_valid_stats[1] = 0
      ndict.savez(ndict.get_value(model.v), logdir+'v_best')
      ndict.savez(ndict.get_value(model.w), logdir+'w_best')
      #if not dataset == 'mnist_binarized':
      if dataset == 'svhn':
        pass
        #np.save(logdir+'full_latent', ('z_test': z_test, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train))
        #np.save(logdir+'last_latent', ('z_test': z_test2, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train2))
      else:
        sio.savemat(logdir+'full_latent.mat', {'z_test': z_test, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train})
        sio.savemat(logdir+'mean_latent.mat', {'z_test': z_test1, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train1})
        sio.savemat(logdir+'last_latent.mat', {'z_test': z_test2, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train2})
        
    else:
      ll_valid_stats[1] += 1
      # Stop when not improving validation set performance in 100 iterations
      if ll_valid_stats[1] > 1000:
        print "Finished"
        with open(logdir+'hook.txt', 'a') as f:
          print >>f, "Finished"
        exit()
    
    print epoch, t, ll, ll_valid, ll_test, ll_valid_stats
    with open(logdir+'hook.txt', 'a') as f:
      print >>f, epoch, t, ll, ll_valid, ll_test, ll_valid_stats
    
    '''
    if dataset != 'svhn':
        l_t, px_t, pz_t, qz_t = model.test(x_train, n_samples=1, n_batch=n_batch, byteToFloat=byteToFloat)
        print 'Elogpx', px_t, 'Elogpz', pz_t, '-Elogqz', qz_t
        #sigma_square = float(os.environ['sigma_square'])
        print 'var', np.mean(np.exp(vv_train)), 'q', np.mean(np.abs(z_train1)), 'p', np.mean(np.abs(train_mean_prior)), 'd', np.mean(np.abs(z_train1-train_mean_prior))
        with open(logdir+'hook.txt', 'a') as f:
          print >>f, 'Elogpx', px_t, 'Elogpz', pz_t, '-Elogqz', qz_t
          print >>f, 'var', np.mean(np.exp(vv_train)), 'q', np.mean(np.abs(z_train1)), 'p', np.mean(np.abs(train_mean_prior)), 'd', np.mean(np.abs(z_train1-train_mean_prior)) 
    '''      
      
    # Graphics
    if gfx and epoch%gfx_freq == 0:
      
      #tail = '.png'
      tail = '-'+str(epoch)+'.png'
      
      v = {i: model.v[i].get_value() for i in model.v}
      w = {i: model.w[i].get_value() for i in model.w}
        
      if 'pca' not in dataset and 'random' not in dataset and 'normalized' not in dataset and 'zca' not in dataset:
        
        
        if 'w0' in v:
          
          image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg)
          image.save(logdir+'q_w0'+tail, 'PNG')
        
        image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg)
        image.save(logdir+'out_w'+tail, 'PNG')
        
        if 'out_unif' in w:
          image = paramgraphics.mat_to_img(f_dec(w['out_unif'].reshape((-1,1))), dim_input, True, colorImg=colorImg)
          image.save(logdir+'out_unif'+tail, 'PNG')
        
        if n_z == 2:
          n_width = 10
          import scipy.stats
          z = {'z':np.zeros((2,n_width**2))}
          for i in range(0,n_width):
            for j in range(0,n_width):
              z['z'][0,n_width*i+j] = scipy.stats.norm.ppf(float(i)/n_width+0.5/n_width)
              z['z'][1,n_width*i+j] = scipy.stats.norm.ppf(float(j)/n_width+0.5/n_width)
          
          x, _, _z = model.gen_xz({}, z, n_width**2)
          if dataset == 'mnist':
            x = 1 - _z['x']
          image = paramgraphics.mat_to_img(f_dec(_z['x']), dim_input)
          image.save(logdir+'2dmanifold'+tail, 'PNG')
        else:
          if 'norb' in dataset or dataset=='svhn':
            nn_batch_nn = 64
          else:
            nn_batch_nn = 144
          if not(os.environ.has_key('train_residual') and bool(int(os.environ['train_residual'])) == True) and (os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True):
            
            
            mp_in = np.random.randint(0,x_train['mean_prior'].shape[1],nn_batch_nn)
            m_p = x_train['mean_prior'][:,mp_in]
            s_s = 1
            if os.environ.has_key('sigma_square'):
                s_s = float(os.environ['sigma_square'])
            x_samples = model.gen_xz_prior({}, {}, m_p, s_s, n_batch=nn_batch_nn)
            x_samples = x_samples['x']
            m_p1 = (np.ones((n_z, nn_batch_nn)).T * np.mean(x_train['mean_prior'], axis = 1)).T
            x_samples1 = model.gen_xz_prior({}, {}, m_p1.astype(np.float32), s_s, n_batch=nn_batch_nn)
            image = paramgraphics.mat_to_img(f_dec(x_samples1['x']), dim_input, colorImg=colorImg)
            image.save(logdir+'mean_samples-prior'+tail, 'PNG')
            x_samples11 = model.gen_xz_prior11({}, {}, m_p, s_s, n_batch=nn_batch_nn)
            image = paramgraphics.mat_to_img(f_dec(x_samples11['x']), dim_input, colorImg=colorImg)
            image.save(logdir+'prior-image'+tail, 'PNG')
          else:
            _x, _, _z_confab = model.gen_xz({}, {}, n_batch=nn_batch_nn)
            x_samples = _z_confab['x']
          image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg)
          image.save(logdir+'samples-prior'+tail, 'PNG')
          
          #x_samples = _x['x']
          #image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg)
          #image.save(logdir+'samples2'+tail, 'PNG')
          
      else:
        # Model with preprocessing
        
        if 'w0' in v:
          tmp = f_dec(v['w0'][:].T)
          
          #print dim_input
          #print tmp.shape
          
          if 'zca' in dataset or dataset=='svhn':
            tmp = zca_dec(zca_mean, zca_winv, tmp)
          image = paramgraphics.mat_to_img(tmp, dim_input, True, colorImg=colorImg)
          image.save(logdir+'q_w0'+tail, 'PNG')
        
        tmp = f_dec(w['out_w'][:])
        if 'zca' in dataset:
          tmp = zca_dec(zca_mean, zca_winv, tmp)
            
        image = paramgraphics.mat_to_img(tmp, dim_input, True, colorImg=colorImg)
        image.save(logdir+'out_w'+tail, 'PNG')
        
        if dataset == 'svhn':
            nn_batch_nn = 64
        else:
            nn_batch_nn = 144
        
        if not(os.environ.has_key('train_residual') and bool(int(os.environ['train_residual'])) == True) and (os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True):
            
            mp_in = np.random.randint(0,x_train['mean_prior'].shape[1],nn_batch_nn)
            m_p = x_train['mean_prior'][:,mp_in]
            s_s = 1
            if os.environ.has_key('sigma_square'):
                s_s = float(os.environ['sigma_square'])
            x_samples = model.gen_xz_prior({}, {}, m_p, s_s, n_batch=nn_batch_nn)
            x_samples = zca_dec(zca_mean, zca_winv,x_samples['x'])
            x_samples = np.minimum(np.maximum(x_samples, 0), 1)
            
            x_samples11 = model.gen_xz_prior11({}, {}, m_p, s_s, n_batch=nn_batch_nn)
            x_samples11 = zca_dec(zca_mean,zca_winv,x_samples11['x'])
            x_samples11 = np.minimum(np.maximum(x_samples11, 0), 1)
            
            image = paramgraphics.mat_to_img(x_samples11, dim_input, colorImg=colorImg)
            image.save(logdir+'prior-image'+tail, 'PNG')
        else:
          _x, _z, _z_confab = model.gen_xz({}, {}, n_batch=nn_batch_nn)
          x_samples = f_dec(_z_confab['x'])
          x_samples = np.minimum(np.maximum(x_samples, 0), 1)
        
        image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg)
        image.save(logdir+'samples'+tail, 'PNG')
        
        '''
        def infer(data, n_batch=1000):
            #print '--', n_batch
            size = data['x'].shape[1]
            res = np.zeros((sum(n_hidden), size))
            res1 = np.zeros((n_z,size))
            res2 = np.zeros((n_hidden[-1],size))
            res3 = np.zeros((n_z,size))
            for i in range(0, size, n_batch):
              idx_to = min(size, i+n_batch)
              x_batch = ndict.getCols(data, i, idx_to)
              
              # may have bugs
              nn_batch = idx_to - i
              
              _x, _z, _z_confab = model.gen_xz(x_batch, {}, nn_batch)
              x_samples = _z_confab['x']
              for (hi, hidden) in enumerate(_z_confab['hidden']):
                res[sum(n_hidden[:hi]):sum(n_hidden[:hi+1]),i:i+nn_batch] = hidden
              res1[:,i:i+nn_batch] = _z_confab['mean']
              res2[:,i:i+nn_batch] = _z_confab['hidden'][-1]
              res3[:,i:i+nn_batch] = _z_confab['logvar']
            #
            return res, res1, res2, res3
        
        #print n_batch
        #if not dataset == 'mnist_binarized':
        z_test, z_test1, z_test2, vv_test = infer(x_test)
        z_train, z_train1, z_train2, vv_train = infer(x_train)
          
        l_t, px_t, pz_t, qz_t = model.test(x_train, n_samples=1, n_batch=n_batch, byteToFloat=byteToFloat)
        print 'Elogpx', px_t, 'Elogpz', pz_t, '-Elogqz', qz_t
        #sigma_square = float(os.environ['sigma_square'])
        print 'var', np.mean(np.exp(vv_train)), 'q', np.mean(np.abs(z_train1)), 'p', np.mean(np.abs(train_mean_prior)), 'd', np.mean(np.abs(z_train1-train_mean_prior))
        with open(logdir+'hook.txt', 'a') as f:
          print >>f, 'Elogpx', px_t, 'Elogpz', pz_t, '-Elogqz', qz_t
          print >>f, 'var', np.mean(np.exp(vv_train)), 'q', np.mean(np.abs(z_train1)), 'p', np.mean(np.abs(train_mean_prior)), 'd', np.mean(np.abs(z_train1-train_mean_prior))
          
        #if not dataset == 'mnist_binarized':  
        sio.savemat(logdir+'full_latent.mat', {'z_test': z_test, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train})
        sio.savemat(logdir+'mean_latent.mat', {'z_test': z_test1, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train1})
        sio.savemat(logdir+'last_latent.mat', {'z_test': z_test2, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train2})
        '''
        
        
  # Optimize
  #SFO
  dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat)
  loop_va(dostep, hook)
  
  pass