Beispiel #1
0
def other_levels(input_dir, hex_digits, binary_digits):
    pickles = os.listdir(input_dir)
    pickles.sort()
    spectrum = {"left": {}, "right": {}}
    hex_num = None
    for pic in pickles:
        m = re.search(r"[0-9a-fA-F]{" + str(hex_digits) + "}_[0-1]{" + str(binary_digits) + "}", pic)
        if m:
            hex_num = re.search(r"[0-9a-fA-F]{" + str(hex_digits) + "}", pic).group()
            binary_num = re.search(r"[0-1]{" + str(binary_digits) + "}", pic).group()
            if "left" in pic:
                if binary_num not in spectrum["left"].keys():
                    with open(input_dir + pic, 'rb') as f:
                        u = pickle._Unpickler(f)
                        u.encoding = 'latin1'
                        spec, freq, time = u.load()
                        # data = pickle.load(f)
                    spectrum["left"][binary_num] = (spec, freq, time)
            else:
                if binary_num not in spectrum["right"].keys():
                    with open(input_dir + pic, 'rb') as f:
                        u = pickle._Unpickler(f)
                        u.encoding = 'latin1'
                        spec, freq, time = u.load()
                        # data = pickle.load(f)
                    spectrum["right"][binary_num] = (spec, freq, time)
    if hex_num is not None:
        save_combined_pickles_other_levels(spectrum, hex_num, input_dir)
Beispiel #2
0
def lowest_level(input_dir, hex_digits):
    pickles = os.listdir(input_dir)
    pickles.sort()
    spectrum = {"left": {}, "right": {}}
    for pic in pickles:
        spec = freq = None
        m = re.search(r"[0-9a-fA-F]{" + str(hex_digits) + "}", pic)
        if m:
            hex_num = re.search(r"[0-9a-fA-F]{" + str(hex_digits) + "}", pic).group()
            if "left" in pic:
                if hex_num not in spectrum["left"].keys():
                    try:
                        with open(input_dir + pic, 'rb') as f:
                            u = pickle._Unpickler(f)
                            u.encoding = 'latin1'
                            spec, freq, time = u.load()
                            # data = pickle.load(f)
                        spectrum["left"][hex_num] = (spec, freq, None)
                    except EOFError:
                        spectrum["left"][hex_num] = (spec, freq, None)
            else:
                if hex_num not in spectrum["right"].keys():
                    try:
                        with open(input_dir + pic, 'rb') as f: 
                            u = pickle._Unpickler(f)
                            u.encoding = 'latin1'
                            spec, freq, time = u.load()
                            # data = pickle.load(f)
                        spectrum["right"][hex_num] = (spec, freq, None)
                    except EOFError:
                        spectrum["left"][hex_num] = (spec, freq, None)
    save_combined_pickles_lowest_level(spectrum, input_dir)
def load_mnist(dataset = 'data/mnist.pkl.gz'):
    """ Loads the dataset

    :type dataset: string
    :param dataset: the path to the dataset (here MNIST)
    """
    data_dir, data_file = os.path.split(dataset)

    if not os.path.exists(data_dir):
        os.makedirs(data_dir)

    if not os.path.isfile(dataset):
        import urllib.request as url
        origin = 'http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz'
        print('Downloading data from ', origin)
        url.urlretrieve(origin, dataset)

    print('Loading data')
    f = gzip.open(dataset, 'rb')
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    train_set, valid_set, test_set = u.load()
    f.close()

    return train_set, valid_set, test_set
Beispiel #4
0
def loadPerson(person, classFunc, featureFunc, plots=False, pad='../dataset'):
    fname = str(pad) + '/s'
    if person < 10:
        fname += '0'
    fname += str(person) + '.dat'
    with open(fname,'rb') as f:
        p = pickle._Unpickler(f)
        p.encoding= ('latin1')
        data = p.load()

        if plots:
            plotClass(data, person)

        #structure of data element:
        #data['labels'][video] = [valence, arousal, dominance, liking]
        #data['data'][video][channel] = [samples * 8064]

        X = featureFunc(data)
        y = classFunc(data)

        #split train / test 
        #n_iter = 1 => abuse the shuffle split, to obtain a static break, instead of crossvalidation
        sss = StratifiedShuffleSplit(y, n_iter=1, test_size=0.25, random_state=19)
        for train_set_index, test_set_index in sss:
            X_train, y_train = X[train_set_index], y[train_set_index]
            X_test , y_test  = X[test_set_index] , y[test_set_index]
        
        #fit normalizer to train set & normalize both train and testset
        #normer = Normalizer(copy=False)
        #normer.fit(X_train, y_train)
        #X_train = normer.transform(X_train, y_train, copy=False)
        #X_test  = normer.transform(X_test, copy=False)

        return X_train, y_train, X_test, y_test
Beispiel #5
0
def load_data():
    """Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data.

    The ``training_data`` is returned as a tuple with two entries.
    The first entry contains the actual training images.  This is a
    numpy ndarray with 50,000 entries.  Each entry is, in turn, a
    numpy ndarray with 784 values, representing the 28 * 28 = 784
    pixels in a single MNIST image.

    The second entry in the ``training_data`` tuple is a numpy ndarray
    containing 50,000 entries.  Those entries are just the digit
    values (0...9) for the corresponding images contained in the first
    entry of the tuple.

    The ``validation_data`` and ``test_data`` are similar, except
    each contains only 10,000 images.

    This is a nice data format, but for use in neural networks it's
    helpful to modify the format of the ``training_data`` a little.
    That's done in the wrapper function ``load_data_wrapper()``, see
    below.
    """
    with gzip.open('data/mnist.pkl.gz', 'rb') as f:
        u = pickle._Unpickler(f)
        u.encoding = 'latin1'
        training_data, validation_data, test_data = u.load()
    f.close()
    return training_data, validation_data, test_data
Beispiel #6
0
    def load(self, person):
        fname = str(self.path) + "/s"
        if person < 10:
            fname += "0"
        fname += str(person) + ".dat"
        with open(fname, "rb") as f:
            p = pickle._Unpickler(f)
            p.encoding = "latin1"
            data = p.load()

            # structure of data element:
            # data['labels'][video] = [valence, arousal, dominance, liking]
            # data['data'][video][channel] = [samples * 8064]

            X = self.featureExtractor.extract(data["data"])
            y = self.classificator.classify(data["labels"])

            # split train / test
            # n_iter = 1 => abuse the shuffle split, to obtain a static break, instead of crossvalidation
            sss = StratifiedShuffleSplit(y, n_iter=1, test_size=0.25, random_state=19)
            for train_set_index, test_set_index in sss:
                X_train, y_train = X[train_set_index], y[train_set_index]
                X_test, y_test = X[test_set_index], y[test_set_index]

            # fit normalizer to train set & normalize both train and testset
            # normer = Normalizer(copy=False)
            # normer.fit(X_train, y_train)
            # X_train = normer.transform(X_train, y_train, copy=False)
            # X_test  = normer.transform(X_test, copy=False)

            return np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)
Beispiel #7
0
def _load_mnist():
    data_dir = os.path.dirname(os.path.abspath(__file__))
    data_file = os.path.join(data_dir, "mnist.pkl.gz")

    print("Looking for data file: ", data_file)

    if not os.path.isfile(data_file):
        import urllib.request as url

        origin = "http://www.iro.umontreal.ca/~lisa/deep/data/mnist/mnist.pkl.gz"
        print("Downloading data from: ", origin)
        url.urlretrieve(origin, data_file)

    print("Loading MNIST data")
    f = gzip.open(data_file, "rb")
    u = pickle._Unpickler(f)
    u.encoding = "latin1"
    train_set, valid_set, test_set = u.load()
    f.close()

    train_x, train_y = train_set
    valid_x, valid_y = valid_set
    testing_x, testing_y = test_set

    training_x = np.vstack((train_x, valid_x))
    training_y = np.concatenate((train_y, valid_y))

    training_x = training_x.reshape((training_x.shape[0], 1, 28, 28))
    testing_x = testing_x.reshape((testing_x.shape[0], 1, 28, 28))

    return training_x, training_y, testing_x, testing_y
    def run(self):

        self.processing_model.state.set(self.SETUP)
        self.processing_model.progress.set("1%")

        request = self.processing_model.processing_request_model

        ds_removal_transform = get_affected_samples(request.trajectory_location, request.story_location)

        self.ds = remove_samples(request.bold_location, request.mask_location, ds_removal_transform)


        with open(request.trajectory_location, 'rb') as f:
            u = pickle._Unpickler(f)
            u.encoding = 'latin1'
            original_trajectory = u.load()

        self.processing_model.progress.set("10%")
        self.processing_model.state.set(self.PROCESSING)

        self.resampled_trajectory = resample(original_trajectory, len(self.ds.samples))
        self.ds.sa['targets'] = self.resampled_trajectory
        self.processing_model.original = self.resampled_trajectory

        #make the ds smaller
        #old_ds = self.ds
        #self.ds = old_ds[:100]
        #self.resampled_trajectory = resample(original_trajectory, len(self.ds.samples))

        #compute the result
        self.compute_result()

        self.processing_model.progress.set("100%")
        self.processing_model.state.set(self.FINISHED)
 def unpickle_from_path(path):
     # Oh... the joys of Py2 vs Py3
     with open(path, 'rb') as f:
         if sys.version_info[0] == 2:
             return pickle.load(f)
         else:
             u = pickle._Unpickler(f)
             u.encoding = 'latin1'
             return u.load()
 def load_data(self, file_name):
     with open(file_name, 'rb') as file:
         unpickler = pickle._Unpickler(file)
         unpickler.encoding = 'latin1'
         contents = unpickler.load()
         X, Y = np.asarray(contents['data'], dtype=np.float32), np.asarray(contents['labels'])
         one_hot = np.zeros((Y.size, Y.max() + 1))
         one_hot[np.arange(Y.size), Y] = 1
         return X, one_hot
Beispiel #11
0
def tester(tests, cmp_method, opts):
	failed=[]
	sgtolerance = opts.sgtolerance
	tolerance = opts.tolerance
	failures = opts.failures
	missing = opts.missing

	for t in tests:
		try:
			mod, mod_name = get_test_mod(t)
			n=len(mod.parameter_list)
		except TypeError:
			continue
		except Exception as e:
			print("%-60s ERROR (%s)" % (t,e))
			failed.append(t)
			continue
		fname = ""

		for i in range(n):
			fname = get_fname(mod_name, i)
			setting_str = "%s setting %d/%d" % (t,i+1,n)
			try:
				a = run_test(mod, mod_name, i)

				try:
					b = pickle.load(open(fname))
				except:

					try:
						b = pickle.load(open(fname, 'rb'))

					except:
						with open(fname, 'rb') as f:
							p = pickle._Unpickler(f)
							p.encoding = 'latin1'
							b = p.load()


				try:
					if cmp_method(a,b,tolerance,sgtolerance):
						if not failures and not missing:
							print("%-60s OK" % setting_str)
					else:
						if not missing:
							failed.append((setting_str, get_fail_string(a), get_fail_string(b)))
							print("%-60s ERROR" % setting_str)
				except Exception as e:
					print(setting_str, e)
			except IOError as e:
				if not failures:
					print("%-60s NO TEST (%s)" % (setting_str, e))
			except Exception as e:
				failed.append(setting_str)
				if not missing:
					print("%-60s EXCEPTION %s" % (setting_str,e))
	return failed
Beispiel #12
0
def load_data():
    """ Return the MNIST data as a tuple containing the training data,
    the validation data, and the test data."""
    with gzip.open("data/mnist.pkl.gz", "rb") as f:
        u = pickle._Unpickler(f)
        u.encoding = "latin1"
        training_data, validation_data, test_data = u.load()
    f.close()
    return (training_data, validation_data, test_data)
Beispiel #13
0
def load(name, dtype="float32"):
    with open(name, "rb") as f:
        u = pickle._Unpickler(f)
        u.encoding = 'latin1'
        data = u.load()
    x = data["data"] / 255
    n = x.shape[0]
    x = x.reshape((n, 3, 32, 32))
    return x.astype(dtype), np.array(data["fine_labels"])
def pickle3_load(bin_file):
    """
    There is some bug with unpacking binary values from pickle objects in
    python 3 - this is my temporary fix.
    """
    with open(bin_file, "rb") as f:
        u = pickle._Unpickler(f)
        u.encoding = "latin1"
        return u.load()
Beispiel #15
0
def load_CIFAR_batch(filename):
  """ load single batch of cifar """
  with open(filename, 'rb') as f:
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    datadict = u.load()
    X = datadict['data']
    Y = datadict['labels']
    X = X.reshape(10000, 3, 32, 32).transpose(0,2,3,1).astype("float")
    Y = np.array(Y)
    return X, Y
def unpickle(file):
    fo = open(file, 'rb')
    # dict = cPickle.load(fo)

    # Solucion: http://stackoverflow.com/questions/11305790/pickle-incompatability-of-numpy-arrays-between-python-2-and-3
    u = pickle._Unpickler(fo)
    u.encoding = 'latin1'
    dict = u.load()
    # .................................
    
    fo.close()
    return dict
Beispiel #17
0
def load(name, path='../dumpedData'):
	fname = path + '/' + name

	data = None
	with open(fname,'rb') as f:
		p = pickle._Unpickler(f)
		p.encoding= ('latin1')
		data = p.load()
	if data == None:
		print('data loading failed for file:', fname)
		exit(-1)

	return data['X_train'], data['y_train'], data['X_test'], data['y_test']
Beispiel #18
0
def load(name, path="../../dumpedData"):
    fname = path + "/" + name
    # print('loading from')
    # print(os.path.abspath(fname))

    data = None
    if os.path.isfile(fname):
        with open(fname, "rb") as f:
            p = pickle._Unpickler(f)
            p.encoding = "latin1"
            data = p.load()

    return data
Beispiel #19
0
    def openTrajectory(self):
        file = self.onOpen([('PICKLE files', '*.pkl'), ('All files', '*')])

        self.trajectory_location = file

        with open(file, 'rb') as f:
            u = pickle._Unpickler(f)
            u.encoding = 'latin1'
            self.trajectory = u.load()

        self.trajectory_location_label['text'] = file[-20:]
        self.plot_trajectory_button['state'] = 'normal'

        self.checkIfProcessingIsPossible()
Beispiel #20
0
def parseMNIST(source):
    import pickle
    import gzip
    f = gzip.open(source, mode="rb")
    # This hack is needed because I'm a lazy ....
    with f:
        u = pickle._Unpickler(f)
        u.encoding = 'latin1'
        tr = u.load()
    f.close()
    headers = None
    data = np.concatenate(((tr[0][0]), (tr[1][0]), (tr[2][0])), axis=0).reshape((70000, 1, 28, 28))
    deps = np.concatenate((tr[0][1], tr[1][1], tr[2][1]))

    return headers, data, deps
Beispiel #21
0
    def load(self, person):
        fname = str(self.path) + "/s"
        if person < 10:
            fname += "0"
        fname += str(person) + ".dat"
        with open(fname, "rb") as f:
            p = pickle._Unpickler(f)
            p.encoding = "latin1"
            data = p.load()

            # structure of data element:
            # data['labels'][video] = [valence, arousal, dominance, liking]
            # data['data'][video][channel] = [samples * 8064]

            X = self.featureExtractor.extract(data["data"])
            y = self.classificator.classify(data["labels"])

            return np.array(X), np.array(y)
Beispiel #22
0
    def computeCorrelation(self, trajectory_location, story_data_location, anatomy, bold_location, mask_location):

        original_dataset = fmri_dataset(bold_location, mask=mask_location)

        #get the trajectory
        with open(trajectory_location, 'rb') as f:
            u = pickle._Unpickler(f)
            u.encoding = 'latin1'
            original_trajectory = u.load()

        #downsample the trajectory to fit match the number of fMRI sample
        resampled_trajectory = resample(original_trajectory, len(original_dataset.samples))

        #convert both datasets to a 1D array
        array1 = original_dataset[:, 0].samples.ravel()
        array2 = resampled_trajectory.ravel()

        correlation_results = []

        range_max = original_dataset.nfeatures

        #convert the story trajectory to a list
        trajectory_list = resampled_trajectory.ravel()

        for num in range(0, range_max):
            # self.processing_progress = "{}/{}".format(num, range_max)

            print("{}/{}".format(num, range_max))

            # if(num > 1000):
            #     correlation_results.append(0)
            #     continue

            sample_list = original_dataset[:, num].samples.ravel()
            correlation_result = numpy.corrcoef(trajectory_list, sample_list)[1,0]
            correlation_results.append(correlation_result)

        #replace NaN with 0s
        correlation_results = np.nan_to_num(correlation_results)

        #use numpy to convert the array to a 2d array - enabling us to create a Dataset
        correlation_matrix = np.reshape(correlation_results,(-1,len(correlation_results)))

        return correlation_matrix
def run():

    f = gzip.open("mnist.pkl.gz", "rb")

    # two methods to load
    # method 1
    u = pickle._Unpickler(f)
    u.encoding = "latin1"
    train_set, valid_set, test_set = u.load()

    # method 2
    # train_set, valid_set, test_set = pickle.load(f)

    f.close()

    train_x, train_y = train_set

    plt.imshow(train_x[2].reshape((28, 28)), cmap=cm.Greys_r)
    plt.show()
Beispiel #24
0
def loadMultiplePersonsData(personCount=32, test_size=8, pad='../dataset', happyThres=0.5):
	#loads data for all persons, for each person put part of videos in test set
	#create two classes happy and not happy
	
	X_train, y_train = [], []
	X_test , y_test  = [], []

	for person in range(1,personCount+1):
		fname = str(pad) + '/s'
		if person < 10:
			fname += '0'
		fname += str(person) + '.dat'
		with open(fname,'rb') as f:
			p = pickle._Unpickler(f)
			p.encoding= ('latin1')
			data = p.load()
			#structure of data element:
			#data['labels'][video] = [valence, arousal, dominance, liking]
			#data['data'][video][channel] = [samples * 8064]

			y = np.array( data['labels'][:,0] ) #ATM only valence needed
			y = (y - 1) / 8 #1->9 to 0->1
			y[ y <= happyThres ] = 0
			y[ y > happyThres ] = 1

			#extract features
			X = []
			for j in range(len(data['data'])): #for each video
				X.append( extract(data['data'][j]) )

		#split into train and test set, while shuffeling
		X_tr, X_te, y_tr, y_te = train_test_split(X,y,test_size=test_size, random_state=42)

		#add to list
		X_train.extend(X_tr)
		X_test.extend(X_te)
		
		y_train.extend(y_tr)
		y_test.extend(y_te)


	return [np.array(X_train), np.array(y_train), np.array(X_test), np.array(y_test)]
Beispiel #25
0
def parseMNIST(source, flatten=True):
    import pickle
    import gzip
    f = gzip.open(source, mode="rb")
    # This hack is needed because I'm a lazy ....
    # TODO: use MNIST data from MNIST website THANK YOU!
    with f:
        # noinspection PyProtectedMember
        u = pickle._Unpickler(f)
        u.encoding = 'latin1'
        tr = u.load()
    f.close()
    headers = None
    trData = np.array([x.flatten() for x in tr[0][0]]) if flatten else tr[0][0]
    teData = np.array([x.flatten() for x in tr[1][0]]) if flatten else tr[1][0]
    vaData = np.array([x.flatten() for x in tr[2][0]]) if flatten else tr[2][0]
    data = np.concatenate((trData, teData, vaData), axis=0)
    deps = np.concatenate((tr[0][1], tr[1][1], tr[2][1]))

    return headers, data, deps
Beispiel #26
0
def load_embeddings(fn, format="prepared"):
    if format == "prepared":
        import io
        import pickle

        content = io.open(fn, 'rb')
        u = pickle._Unpickler(content)
        u.encoding = 'latin1'
        state = u.load()
        voc, vec = state
        if len(voc) == 2:
            words, counts = voc
            word_count = dict(zip(words, counts))
            vocab = web.embedding.CountedVocabulary(word_count=word_count)
        else:
            vocab = web.embedding.OrderedVocabulary(voc)
        return web.embedding.Embedding(vocabulary=vocab, vectors=vec)
    else:
        return web.embeddings.load_embedding(fn, format=format, normalize=False,
                                             clean_words=False)
def download_embedding():
    """
    Download files from web
    Seems cannot download by pgm
    Download from: https://sites.google.com/site/rmyeid/projects/polyglot

    Returns:
        A tuple (word, embedding). Emebddings shape is (100004, 64).
    """

    assert (tf.gfile.Exists(FLAGS.chr_embedding_dir)), (
        "Embedding pkl don't found, please \
        download the Chinese chr embedding from https://sites.google.com/site/rmyeid/projects/polyglot"
    )

    with open(FLAGS.chr_embedding_dir, 'rb') as f:
        u = pickle._Unpickler(f)
        u.encoding = 'latin1'
        p = u.load()

    return p
Beispiel #28
0
    def load(self, personList=range(1, 33)):
        X, y = [], []

        for person in personList:
            print("loading person " + str(person))
            fname = str(self.path) + "/s"
            if person < 10:
                fname += "0"
            fname += str(person) + ".dat"
            with open(fname, "rb") as f:
                p = pickle._Unpickler(f)
                p.encoding = "latin1"
                data = p.load()

                # structure of data element:
                # data['labels'][video] = [valence, arousal, dominance, liking]
                # data['data'][video][channel] = [samples * 8064]

                X.extend(self.featureExtractor.extract(data["data"]))
                y.extend(self.classificator.classify(data["labels"]))

        return np.array(X), np.array(y)
Beispiel #29
0
    def _pickle_loads(self, value):
        def persistent_load(pid):
            if pid == b'ctx':
                return self._ctx
            else:
                raise pickle.UnpicklingError(
                    'unsupported persistent object: %r'  % pid)

        f = io.BytesIO(value)
        unpickler = pickle._Unpickler(f)
        unpickler.persistent_load = persistent_load
        obj = unpickler.load()

        def unpersist(obj):
            if isinstance(obj, _ObjectID):
                o = object.__new__(obj.cls)
                for key, value in obj.state.items():
                    setattr(o, key, unpersist(value))
                return o
            else:
                return obj

        return unpersist(obj)
Beispiel #30
0
### This code is used to generate the profile of a user. A user profile is composed of a list of genre preferences ###
### for each user in the dataset ####

import pickle
import numpy as np
import collections

with open('../data/movie_genres.pickle', 'rb') as handle:
    u = pickle._Unpickler(handle)
    u.encoding = 'latin1'
    genres = u.load()
print("Movie genres loaded")

with open('../data/movie_cast.pickle', 'wb') as handle:
    u = pickle._Unpickler(handle)
    u.encoding = 'latin1'
    cast = u.load()
print("Movie casts loaded")

with open('../data/user_rating_full.pickle', 'rb') as handle:
    user_rating = pickle.load(handle)
print("User ratings loaded")

num_users = len(user_rating)
user_genre_profile = [[] for i in range(num_users)]

for uid, ratings in enumerate(user_rating):

    user_genre = [0 for j in range(len(genres))]

    movie_count = 0
Beispiel #31
0
    def download(self):

        if self._check_exists():
            return

        #create the directories
        os.makedirs(self.processed_folder, exist_ok=True)

        print('Downloading data...')

        # get the dataset from the web
        os.system('wget ftp://m1370728:[email protected]/data.zip')
        os.system('unzip data.zip -d ' + self.raw_folder)
        os.system('rm data.zip')

        #Processing data
        print('Scanning data...')

        #collect all the possible time stamps
        train_localdir = os.path.join(self.raw_folder, 'data', 'train')
        test_localdir = os.path.join(self.raw_folder, 'data', 'test')
        eval_localdir = os.path.join(self.raw_folder, 'data', 'eval')
        first = True
        timeC = 0
        badweather_labels = np.array([0, 1, 2, 3])

        unique_times = np.array([0])
        for filename in (os.listdir(train_localdir)):
            with open(os.path.join(train_localdir, filename), "rb") as f:
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                X, Y, _ = u.load()
                if first:
                    raw_batchsize, maxobs, nfeatures = X.shape
                    _, _, nclasses = Y.shape
                    first = False
                unique_times = np.unique(
                    np.hstack([X[:, :, 0].ravel(), unique_times]))

        unique_times = np.array([0])
        for filename in (os.listdir(test_localdir)):
            with open(os.path.join(test_localdir, filename), "rb") as f:
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                X, _, _ = u.load()
                unique_times = np.unique(
                    np.hstack([X[:, :, 0].ravel(), unique_times]))

        unique_times = np.array([0])
        for filename in (os.listdir(eval_localdir)):
            with open(os.path.join(eval_localdir, filename), "rb") as f:
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                X, _, _ = u.load()
                unique_times = np.unique(
                    np.hstack([X[:, :, timeC].ravel(), unique_times]))

        #write timestamps file
        timestamps_hdf5 = h5py.File(
            os.path.join(self.processed_folder, self.time_file), 'w')
        timestamps_hdf5.create_dataset('tt', data=unique_times)

        trainbatchsizes = []

        #HDF5 style dataset
        #adjust the numbers! Or just resize in the end...

        print('Scanning Training data...')
        for filename in tqdm(os.listdir(train_localdir)):

            #starting a new batch
            X_mod = np.zeros((raw_batchsize, maxobs, nfeatures))
            Y_mod = np.zeros((raw_batchsize, maxobs, nclasses))
            mask = np.zeros((raw_batchsize, maxobs, nfeatures), dtype=bool)

            with open(os.path.join(train_localdir, filename), "rb") as f:

                #Unpacking procedure with pickels
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                data = u.load()
                X, Y, obslen = data

                raw_batchsize, maxobs, nfeatures = X.shape
                _, _, nclasses = Y.shape
                times = X[:, :, timeC]  #(500,26)

                #get the time ordering of time
                for ind, t in enumerate(unique_times):
                    ind = ind
                    if abs(t - 1) < 0.0001:
                        #correct for the offset thing, where the first measurement is in the last year
                        ind0 = 0

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        X_mod[sampleind, ind0, :] = X[sampleind, timeind, :]
                        X_mod[sampleind, ind0, timeC] = 0  #set to zero
                        Y_mod[sampleind, ind0, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, 0, :] = True

                    elif abs(t) < 0.0001:  #no data => do nothing
                        #print("was a 0")
                        pass
                    else:

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        X_mod[sampleind, ind, :] = X[sampleind, timeind, :]
                        Y_mod[sampleind, ind, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, ind, :] = True

                # cloud/weather mask
                # 1 is observed, 0 is not observed, due to clouds/ice/snow and stuff
                # we mark the bad weather observations in the mask as unoberved
                badweather_obs = np.nonzero(
                    np.sum(Y_mod[:, :, badweather_labels], axis=2) != 0)
                mask[badweather_obs[0], badweather_obs[1], :] = 0

                #"destroy" data, that is corrputed by bad weather. We will never use it!
                X_mod[~mask] = 0

                #Truncate the timestamp-column (timeC) from the features and mask
                X_mod = np.delete(X_mod, (timeC), axis=2)
                X_mask_mod = np.delete(mask, (timeC), axis=2)

                #truncate and renormalize the labels
                Y_mod = np.delete(Y_mod, badweather_labels, axis=2)
                tot_weight = np.repeat(np.sum(Y_mod, axis=2)[:, :, None],
                                       repeats=nclasses -
                                       badweather_labels.size,
                                       axis=2)
                Y_mod = np.divide(Y_mod,
                                  tot_weight,
                                  out=np.zeros_like(Y_mod),
                                  where=tot_weight != 0)

                #delete datapoints without any labels
                #check that "mask" argument indeed contains a mask for data
                # we also need more than one point in time, because we integrate
                unobserved_datapt = np.where(
                    (np.sum(X_mask_mod == 1., axis=(1, 2)) == 0.))  #no data
                no_labels = np.where((np.sum(Y_mod,
                                             axis=(1, 2)) == 0.))  #no labels
                too_few_obs_tp = np.where(
                    np.sum(np.sum(X_mask_mod == 1., 2) != 0, 1) < 2)

                samples_to_delete = np.unique(
                    np.hstack([unobserved_datapt, no_labels, too_few_obs_tp]))

                X_mod = np.delete(X_mod, (samples_to_delete), axis=0)
                X_mask_mod = np.delete(X_mask_mod, (samples_to_delete), axis=0)
                if self.noskip:
                    X_mask_mod = (X_mod != 0)
                Y_mod = np.delete(Y_mod, (samples_to_delete), axis=0)

                #make assumptions about the label, harden
                Y_mod = np.sum(Y_mod, axis=1) / np.repeat(
                    np.sum(Y_mod, axis=(1, 2))[:, None],
                    repeats=nclasses - badweather_labels.size,
                    axis=1)

                trainbatchsizes.append(Y_mod.shape[0])
        ntrainsamples = sum(trainbatchsizes)
        testbatchsizes = []

        print('Scanning Testing data...')
        for filename in tqdm(os.listdir(test_localdir)):

            #starting a new batch
            X_mod = np.zeros((raw_batchsize, maxobs, nfeatures))
            Y_mod = np.zeros((raw_batchsize, maxobs, nclasses))
            mask = np.zeros((raw_batchsize, maxobs, nfeatures), dtype=bool)

            with open(os.path.join(test_localdir, filename), "rb") as f:
                #Unpacking procedure with pickels
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                data = u.load()
                X, Y, obslen = data

                raw_batchsize, maxobs, nfeatures = X.shape
                _, _, nclasses = Y.shape
                times = X[:, :, timeC]  #(500,26)

                #get the time ordering of time
                for ind, t in enumerate(unique_times):
                    ind = ind
                    if abs(t - 1) < 0.0001:
                        #correct for the offset thing, where the first measurement is in the last year
                        ind0 = 0

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        Y_mod[sampleind, ind0, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, 0, :] = True

                    elif abs(t) < 0.0001:  #no data => do nothing
                        #print("was a 0")
                        pass
                    else:

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        Y_mod[sampleind, ind, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, ind, :] = True

                # cloud/weather mask
                # 1 is observed, 0 is not observed, due to clouds/ice/snow and stuff
                # we mark the bad weather observations in the mask as unoberved
                badweather_obs = np.nonzero(
                    np.sum(Y_mod[:, :, badweather_labels], axis=2) != 0)
                mask[badweather_obs[0], badweather_obs[1], :] = 0

                #Truncate the timestamp-column (timeC) from the features and mask
                X_mask_mod = np.delete(mask, (timeC), axis=2)

                #truncate and renormalize the labels
                Y_mod = np.delete(Y_mod, badweather_labels, axis=2)
                tot_weight = np.repeat(np.sum(Y_mod, axis=2)[:, :, None],
                                       repeats=nclasses -
                                       badweather_labels.size,
                                       axis=2)
                Y_mod = np.divide(Y_mod,
                                  tot_weight,
                                  out=np.zeros_like(Y_mod),
                                  where=tot_weight != 0)

                #delete datapoints without any labels
                #check that "mask" argument indeed contains a mask for data
                unobserved_datapt = np.where(
                    (np.sum(X_mask_mod == 1., axis=(1, 2)) == 0.))  #no data
                no_labels = np.where((np.sum(Y_mod,
                                             axis=(1, 2)) == 0.))  #no labels
                too_few_obs_tp = np.where(
                    np.sum(np.sum(X_mask_mod == 1., 2) != 0, 1) < 2)

                samples_to_delete = np.unique(
                    np.hstack([unobserved_datapt, no_labels, too_few_obs_tp]))

                X_mask_mod = np.delete(X_mask_mod, (samples_to_delete), axis=0)
                Y_mod = np.delete(Y_mod, (samples_to_delete), axis=0)

                testbatchsizes.append(Y_mod.shape[0])
        ntestsamples = sum(testbatchsizes)
        evalbatchsizes = []

        print('Scanning Evaluation data...')
        for filename in tqdm(os.listdir(eval_localdir)):

            #starting a new batch
            Y_mod = np.zeros((raw_batchsize, maxobs, nclasses))
            mask = np.zeros((raw_batchsize, maxobs, nfeatures), dtype=bool)

            with open(os.path.join(eval_localdir, filename), "rb") as f:
                #Unpacking procedure with pickels
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                data = u.load()
                X, Y, obslen = data

                raw_batchsize, maxobs, nfeatures = X.shape
                _, _, nclasses = Y.shape
                times = X[:, :, timeC]  #(500,26)

                #get the time ordering of time
                for ind, t in enumerate(unique_times):
                    ind = ind
                    if abs(t - 1) < 0.0001:
                        #correct for the offset thing, where the first measurement is in the last year
                        ind0 = 0

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        Y_mod[sampleind, ind0, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, 0, :] = True

                    elif abs(t) < 0.0001:  #no data => do nothing
                        #print("was a 0")
                        pass
                    else:

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        Y_mod[sampleind, ind, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, ind, :] = True

                # cloud/weather mask
                # 1 is observed, 0 is not observed, due to clouds/ice/snow and stuff
                # we mark the bad weather observations in the mask as unoberved
                badweather_obs = np.nonzero(
                    np.sum(Y_mod[:, :, badweather_labels], axis=2) != 0)
                mask[badweather_obs[0], badweather_obs[1], :] = 0

                #Truncate the timestamp-column (timeC) from the features and mask
                X_mask_mod = np.delete(mask, (timeC), axis=2)

                #truncate and renormalize the labels
                Y_mod = np.delete(Y_mod, badweather_labels, axis=2)
                tot_weight = np.repeat(np.sum(Y_mod, axis=2)[:, :, None],
                                       repeats=nclasses -
                                       badweather_labels.size,
                                       axis=2)
                Y_mod = np.divide(Y_mod,
                                  tot_weight,
                                  out=np.zeros_like(Y_mod),
                                  where=tot_weight != 0)

                #delete datapoints without any labels
                #check that "mask" argument indeed contains a mask for data
                unobserved_datapt = np.where(
                    (np.sum(X_mask_mod == 1., axis=(1, 2)) == 0.))  #no data
                no_labels = np.where((np.sum(Y_mod,
                                             axis=(1, 2)) == 0.))  #no labels
                too_few_obs_tp = np.where(
                    np.sum(np.sum(X_mask_mod == 1., 2) != 0, 1) < 2)

                samples_to_delete = np.unique(
                    np.hstack([unobserved_datapt, no_labels, too_few_obs_tp]))

                X_mask_mod = np.delete(X_mask_mod, (samples_to_delete), axis=0)
                Y_mod = np.delete(Y_mod, (samples_to_delete), axis=0)

                evalbatchsizes.append(Y_mod.shape[0])
        nevalsamples = sum(evalbatchsizes)
        batchsizes = []

        ntargetclasses = nclasses - badweather_labels.size

        # Open a hdf5 files and create arrays
        hdf5_file_train = h5py.File(os.path.join(self.processed_folder,
                                                 self.train_file),
                                    mode='w')
        hdf5_file_train.create_dataset(
            "data", (ntrainsamples, len(unique_times), nfeatures - 1),
            np.float)
        hdf5_file_train.create_dataset(
            "mask", (ntrainsamples, len(unique_times), nfeatures - 1), np.bool)
        hdf5_file_train.create_dataset("labels",
                                       (ntrainsamples, ntargetclasses),
                                       np.float)

        hdf5_file_test = h5py.File(os.path.join(self.processed_folder,
                                                self.test_file),
                                   mode='w')
        hdf5_file_test.create_dataset(
            "data", (ntestsamples, len(unique_times), nfeatures - 1), np.float)
        hdf5_file_test.create_dataset(
            "mask", (ntestsamples, len(unique_times), nfeatures - 1), np.bool)
        hdf5_file_test.create_dataset("labels", (ntestsamples, ntargetclasses),
                                      np.float)

        hdf5_file_eval = h5py.File(os.path.join(self.processed_folder,
                                                self.eval_file),
                                   mode='w')
        hdf5_file_eval.create_dataset(
            "data", (nevalsamples, len(unique_times), nfeatures - 1), np.float)
        hdf5_file_eval.create_dataset(
            "mask", (nevalsamples, len(unique_times), nfeatures - 1), np.bool)
        hdf5_file_eval.create_dataset("labels", (nevalsamples, ntargetclasses),
                                      np.float)

        observed = 0
        missing = 0

        # prepare shuffeling of samples
        indices = np.arange(ntrainsamples)

        if self.shuffle:
            np.random.shuffle(indices)

        #Training data
        print("Building training dataset...")
        first_batch = True
        for fid, filename in enumerate(tqdm(
                os.listdir(train_localdir))):  #tqdm

            #starting a new batch
            X_mod = np.zeros((raw_batchsize, maxobs, nfeatures))
            Y_mod = np.zeros((raw_batchsize, maxobs, nclasses))
            mask = np.zeros((raw_batchsize, maxobs, nfeatures), dtype=bool)

            with open(os.path.join(train_localdir, filename), "rb") as f:

                #Unpacking procedure with pickels
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                data = u.load()
                X, Y, obslen = data

                raw_batchsize, maxobs, nfeatures = X.shape
                _, _, nclasses = Y.shape
                times = X[:, :, timeC]  #(500,26)

                #get the time ordering of time
                for ind, t in enumerate(unique_times):
                    ind = ind
                    if abs(t - 1) < 0.0001:
                        #correct for the offset thing, where the first measurement is in the last year
                        ind0 = 0

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        X_mod[sampleind, ind0, :] = X[sampleind, timeind, :]
                        X_mod[sampleind, ind0, timeC] = 0  #set to zero
                        Y_mod[sampleind, ind0, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, 0, :] = True

                    elif abs(t) < 0.0001:  #no data => do nothing
                        #print("was a 0")
                        pass
                    else:

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        X_mod[sampleind, ind, :] = X[sampleind, timeind, :]
                        Y_mod[sampleind, ind, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, ind, :] = True

                # cloud/weather mask
                # 1 is observed, 0 is not observed, due to clouds/ice/snow and stuff
                # we mark the bad weather observations in the mask as unoberved
                badweather_obs = np.nonzero(
                    np.sum(Y_mod[:, :, badweather_labels], axis=2) != 0)
                mask[badweather_obs[0], badweather_obs[1], :] = 0

                #"destroy" data, that is corrputed by bad weather. We will never use it!
                if self.noskip:
                    mask_2 = (X_mod != 0)
                    X_mod[~mask_2] = 0
                else:
                    X_mod[~mask] = 0

                #Truncate the timestamp-column (timeC) from the features and mask
                X_mod = np.delete(X_mod, (timeC), axis=2)
                X_mask_mod = np.delete(mask, (timeC), axis=2)
                #X_mask_2_mod = np.delete(mask_2, (timeC), axis=2)

                #truncate and renormalize the labels
                Y_mod = np.delete(Y_mod, badweather_labels, axis=2)
                tot_weight = np.repeat(np.sum(Y_mod, axis=2)[:, :, None],
                                       repeats=nclasses -
                                       badweather_labels.size,
                                       axis=2)
                Y_mod = np.divide(Y_mod,
                                  tot_weight,
                                  out=np.zeros_like(Y_mod),
                                  where=tot_weight != 0)

                #delete datapoints without any labels
                #check that "mask" argument indeed contains a mask for data
                unobserved_datapt = np.where(
                    (np.sum(X_mask_mod == 1., axis=(1, 2)) == 0.))  #no data
                no_labels = np.where((np.sum(Y_mod,
                                             axis=(1, 2)) == 0.))  #no labels
                too_few_obs_tp = np.where(
                    np.sum(np.sum(X_mask_mod == 1., 2) != 0, 1) < 2)

                samples_to_delete = np.unique(
                    np.hstack([unobserved_datapt, no_labels, too_few_obs_tp]))

                X_mod = np.delete(X_mod, (samples_to_delete), axis=0)
                X_mask_mod = np.delete(X_mask_mod, (samples_to_delete), axis=0)
                if self.noskip:
                    X_mask_mod = (X_mod != 0)
                Y_mod = np.delete(Y_mod, (samples_to_delete), axis=0)

                #make assumptions about the label, harden
                Y_mod = np.sum(Y_mod, axis=1) / np.repeat(
                    np.sum(Y_mod, axis=(1, 2))[:, None],
                    repeats=nclasses - badweather_labels.size,
                    axis=1)

                #for statistics
                missing += np.sum(mask == 0.)
                observed += np.sum(mask == 1.)

                valid_batchsize = X_mod.shape[0]

                #get the time stamps
                tt = unique_times

                if first_batch:
                    start_ix = 0
                    stop_ix = valid_batchsize
                    first_batch = False
                else:
                    start_ix = stop_ix
                    stop_ix += valid_batchsize

                #fill in data to hdf5 file
                sorted_indices = np.sort(indices[start_ix:stop_ix])

                hdf5_file_train["data"][sorted_indices, ...] = X_mod
                hdf5_file_train["mask"][sorted_indices, ...] = X_mask_mod
                hdf5_file_train["labels"][sorted_indices, ...] = Y_mod

                #hdf5_file_train["data"][start_ix:stop_ix, ...] = X_mod
                #hdf5_file_train["mask"][start_ix:stop_ix, ...] = X_mask_mod
                #hdf5_file_train["labels"][start_ix:stop_ix, ...] = Y_mod

        #Testing data
        print("Building testing dataset...")
        first_batch = True
        for fid, filename in enumerate(tqdm(os.listdir(test_localdir))):  #tqdm

            #starting a new batch
            X_mod = np.zeros((raw_batchsize, maxobs, nfeatures))
            Y_mod = np.zeros((raw_batchsize, maxobs, nclasses))
            mask = np.zeros((raw_batchsize, maxobs, nfeatures), dtype=bool)

            with open(os.path.join(test_localdir, filename), "rb") as f:

                #Unpacking procedure with pickels
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                data = u.load()
                X, Y, obslen = data

                raw_batchsize, maxobs, nfeatures = X.shape
                _, _, nclasses = Y.shape
                times = X[:, :, timeC]  #(500,26)

                #get the time ordering of time
                for ind, t in enumerate(unique_times):
                    ind = ind
                    if abs(t - 1) < 0.0001:
                        #correct for the offset thing, where the first measurement is in the last year
                        ind0 = 0

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        X_mod[sampleind, ind0, :] = X[sampleind, timeind, :]
                        X_mod[
                            sampleind, ind0,
                            timeC] = 0  #set to zero, last day of previous year
                        Y_mod[sampleind, ind0, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, 0, :] = True

                    elif abs(t) < 0.0001:  #no data => do nothing
                        #print("was a 0")
                        pass
                    else:

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        X_mod[sampleind, ind, :] = X[sampleind, timeind, :]
                        Y_mod[sampleind, ind, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, ind, :] = True

                # cloud/weather mask
                # 1 is observed, 0 is not observed, due to clouds/ice/snow and stuff
                # we mark the bad weather observations in the mask as unoberved
                badweather_obs = np.nonzero(
                    np.sum(Y_mod[:, :, badweather_labels], axis=2) != 0)
                mask[badweather_obs[0], badweather_obs[1], :] = 0

                #"destroy" data, that is corrputed by bad weather. We will never use it!
                # "all masked out elements should be zeros"
                if self.noskip:
                    X_mask_mod = (X_mod != 0)

                #Truncate the timestamp-column (timeC) from the features and mask
                X_mod = np.delete(X_mod, (timeC), axis=2)
                X_mask_mod = np.delete(mask, (timeC), axis=2)

                #truncate and renormalize the labels
                Y_mod = np.delete(Y_mod, badweather_labels, axis=2)
                tot_weight = np.repeat(np.sum(Y_mod, axis=2)[:, :, None],
                                       repeats=nclasses -
                                       badweather_labels.size,
                                       axis=2)
                Y_mod = np.divide(Y_mod,
                                  tot_weight,
                                  out=np.zeros_like(Y_mod),
                                  where=tot_weight != 0)

                #delete datapoints without any labels
                #check that "mask" argument indeed contains a mask for data
                unobserved_datapt = np.where(
                    (np.sum(X_mask_mod == 1., axis=(1, 2)) == 0.))  #no data
                no_labels = np.where((np.sum(Y_mod,
                                             axis=(1, 2)) == 0.))  #no labels
                too_few_obs_tp = np.where(
                    np.sum(np.sum(X_mask_mod == 1., 2) != 0, 1) < 2)

                samples_to_delete = np.unique(
                    np.hstack([unobserved_datapt, no_labels, too_few_obs_tp]))

                X_mod = np.delete(X_mod, (samples_to_delete), axis=0)
                X_mask_mod = np.delete(X_mask_mod, (samples_to_delete), axis=0)
                if self.noskip:
                    X_mask_mod = (X_mod != 0)
                Y_mod = np.delete(Y_mod, (samples_to_delete), axis=0)

                #make assumptions about the label
                Y_mod = np.sum(Y_mod, axis=1) / np.repeat(
                    np.sum(Y_mod, axis=(1, 2))[:, None],
                    repeats=nclasses - badweather_labels.size,
                    axis=1)

                #for statistics
                missing += np.sum(mask == 0.)
                observed += np.sum(mask == 1.)

                valid_batchsize = X_mod.shape[0]

                #get the time stamps
                tt = unique_times

                if first_batch:
                    start_ix = 0
                    stop_ix = valid_batchsize
                    first_batch = False
                else:
                    start_ix = stop_ix
                    stop_ix += valid_batchsize

                #fill in data to hdf5 file
                hdf5_file_test["data"][start_ix:stop_ix, ...] = X_mod
                hdf5_file_test["mask"][start_ix:stop_ix, ...] = X_mask_mod
                hdf5_file_test["labels"][start_ix:stop_ix, ...] = Y_mod

        start_ix = 0
        stop_ix = 0

        #Evaluation data
        print("Building evaluation dataset...")
        first_batch = True
        for fid, filename in enumerate(tqdm(os.listdir(eval_localdir))):  #tqdm

            #starting a new batch
            X_mod = np.zeros((raw_batchsize, maxobs, nfeatures))
            Y_mod = np.zeros((raw_batchsize, maxobs, nclasses))
            mask = np.zeros((raw_batchsize, maxobs, nfeatures), dtype=bool)

            with open(os.path.join(eval_localdir, filename), "rb") as f:

                #Unpacking procedure with pickels
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                data = u.load()
                X, Y, obslen = data

                raw_batchsize, maxobs, nfeatures = X.shape
                _, _, nclasses = Y.shape
                times = X[:, :, timeC]  #(500,26)

                #get the time ordering of time
                for ind, t in enumerate(unique_times):
                    ind = ind
                    if abs(t - 1) < 0.0001:
                        #correct for the offset thing, where the first measurement is in the last year
                        ind0 = 0

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        X_mod[sampleind, ind0, :] = X[sampleind, timeind, :]
                        X_mod[sampleind, ind0, timeC] = 0  #set to zero
                        Y_mod[sampleind, ind0, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, 0, :] = True

                    elif abs(t) < 0.0001:  #no data => do nothing
                        #print("was a 0")
                        pass
                    else:

                        #Indices of corresponding times
                        sampleind = np.nonzero(times == t)[0]
                        timeind = np.nonzero(times == t)[1]

                        #place at correct position
                        X_mod[sampleind, ind, :] = X[sampleind, timeind, :]
                        Y_mod[sampleind, ind, :] = Y[sampleind, timeind, :]

                        #mark as observed in mask
                        mask[sampleind, ind, :] = True

                # cloud/weather mask
                # 1 is observed, 0 is not observed, due to clouds/ice/snow and stuff
                # we mark the bad weather observations in the mask as unoberved
                badweather_obs = np.nonzero(
                    np.sum(Y_mod[:, :, badweather_labels], axis=2) != 0)
                mask[badweather_obs[0], badweather_obs[1], :] = 0

                #"destroy" data, that is corrputed by bad weather. We will never use it!
                # "all masked out elements should be zeros"
                if self.noskip:
                    mask_2 = (X_mod != 0)
                    X_mod[~mask_2] = 0
                else:
                    X_mod[~mask] = 0

                #Truncate the timestamp-column (timeC) from the features and mask
                X_mod = np.delete(X_mod, (timeC), axis=2)
                X_mask_mod = np.delete(mask, (timeC), axis=2)

                #truncate and renormalize the labels
                Y_mod = np.delete(Y_mod, badweather_labels, axis=2)
                tot_weight = np.repeat(np.sum(Y_mod, axis=2)[:, :, None],
                                       repeats=nclasses -
                                       badweather_labels.size,
                                       axis=2)
                Y_mod = np.divide(Y_mod,
                                  tot_weight,
                                  out=np.zeros_like(Y_mod),
                                  where=tot_weight != 0)

                #delete datapoints without any labels
                #check that "mask" argument indeed contains a mask for data
                unobserved_datapt = np.where(
                    (np.sum(X_mask_mod == 1., axis=(1, 2)) == 0.))  #no data
                no_labels = np.where((np.sum(Y_mod,
                                             axis=(1, 2)) == 0.))  #no labels
                too_few_obs_tp = np.where(
                    np.sum(np.sum(X_mask_mod == 1., 2) != 0, 1) < 2)

                samples_to_delete = np.unique(
                    np.hstack([unobserved_datapt, no_labels, too_few_obs_tp]))

                X_mod = np.delete(X_mod, (samples_to_delete), axis=0)
                X_mask_mod = np.delete(X_mask_mod, (samples_to_delete), axis=0)
                if self.noskip:
                    X_mask_mod = (X_mod != 0)
                Y_mod = np.delete(Y_mod, (samples_to_delete), axis=0)

                #make assumptions about the label
                Y_mod = np.sum(Y_mod, axis=1) / np.repeat(
                    np.sum(Y_mod, axis=(1, 2))[:, None],
                    repeats=nclasses - badweather_labels.size,
                    axis=1)

                #for statistics
                missing += np.sum(mask == 0.)
                observed += np.sum(mask == 1.)

                valid_batchsize = X_mod.shape[0]

                #get the time stamps
                tt = unique_times

                if first_batch:
                    start_ix = 0
                    stop_ix = valid_batchsize
                    first_batch = False
                else:
                    start_ix = stop_ix
                    stop_ix += valid_batchsize

                #fill in data to hdf5 file
                hdf5_file_eval["data"][start_ix:stop_ix, ...] = X_mod
                hdf5_file_eval["mask"][start_ix:stop_ix, ...] = X_mask_mod
                hdf5_file_eval["labels"][start_ix:stop_ix, ...] = Y_mod

        if self.normalize:

            print(
                "Calculating mean and standard deviation of training dataset..."
            )
            training_mean2 = np.ma.array(
                hdf5_file_train["data"][:],
                mask=~hdf5_file_train["mask"][:]).mean(axis=(0, 1))
            training_std2 = np.ma.array(hdf5_file_train["data"][:],
                                        mask=~hdf5_file_train["mask"][:]).std(
                                            axis=(0, 1), ddof=1)

            print("Normalizing data. This may take some time ...")
            #sorry for this large one-liner, but it's just normalization of the observed values
            hdf5_file_train["data"][:] = np.divide(
                np.subtract(hdf5_file_train["data"],
                            training_mean2,
                            out=np.zeros_like(hdf5_file_train["data"][:]),
                            where=hdf5_file_train["mask"][:]),
                training_std2,
                out=np.zeros_like(hdf5_file_train["data"][:]),
                where=hdf5_file_train["mask"][:])
            hdf5_file_test["data"][:] = np.divide(
                np.subtract(hdf5_file_test["data"],
                            training_mean2,
                            out=np.zeros_like(hdf5_file_test["data"][:]),
                            where=hdf5_file_test["mask"][:]),
                training_std2,
                out=np.zeros_like(hdf5_file_test["data"][:]),
                where=hdf5_file_test["mask"][:])
            hdf5_file_eval["data"][:] = np.divide(
                np.subtract(hdf5_file_eval["data"],
                            training_mean2,
                            out=np.zeros_like(hdf5_file_eval["data"][:]),
                            where=hdf5_file_eval["mask"][:]),
                training_std2,
                out=np.zeros_like(hdf5_file_eval["data"][:]),
                where=hdf5_file_eval["mask"][:])

        print("Preprocessing finished")

        hdf5_file_train.close()
        hdf5_file_test.close()
        hdf5_file_eval.close()

        missing_rate = missing / (observed + missing)
        print("Missingness rate:", str(missing_rate * 100), " %")
Beispiel #32
0
    def __init__(self,
                 bm_path,
                 params=None,
                 num_betas=10,
                 batch_size=1,
                 v_template=None,
                 num_dmpls=None,
                 path_dmpl=None,
                 num_expressions=10,
                 use_posedirs=True,
                 dtype=torch.float32):

        super(BodyModel, self).__init__()
        '''
        :param bm_path: path to a SMPL model as pkl file
        :param num_betas: number of shape parameters to include.
                if betas are provided in params, num_betas would be overloaded with number of thoes betas
        :param batch_size: number of smpl vertices to get
        :param device: default on gpu
        :param dtype: float precision of the compuations
        :return: verts, trans, pose, betas 
        '''
        # Todo: if params the batchsize should be read from one of the params

        self.dtype = dtype

        if params is None: params = {}

        # -- Load SMPL params --
        if '.npz' in bm_path:
            smpl_dict = np.load(bm_path, encoding='latin1')
        elif '.pkl' in bm_path:
            import pickle
            import gzip
            with open(bm_path, 'rb') as f:
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                smpl_dict = u.load()
                #smpl_dict = np.load(bm_path, allow_pickle = True)
        else:
            raise ValueError('bm_path should be either a .pkl nor .npz file')

        njoints = smpl_dict['posedirs'].shape[2] // 3
        self.model_type = {
            69: 'smpl',
            153: 'smplh',
            162: 'smplx',
            45: 'mano'
        }[njoints]

        assert self.model_type in [
            'smpl', 'smplh', 'smplx', 'mano', 'mano'
        ], ValueError('model_type should be in smpl/smplh/smplx/mano.')

        self.use_dmpl = False
        if num_dmpls is not None:
            if path_dmpl is not None:
                self.use_dmpl = True
            else:
                raise (ValueError(
                    'path_dmpl should be provided when using dmpls!'))

        if self.use_dmpl and self.model_type in ['smplx', 'mano']:
            raise (NotImplementedError(
                'DMPLs only work with SMPL/SMPLH models for now.'))

        # Mean template vertices
        if v_template is None:
            v_template = np.repeat(smpl_dict['v_template'][np.newaxis],
                                   batch_size,
                                   axis=0)
        else:
            v_template = np.repeat(v_template[np.newaxis], batch_size, axis=0)

        self.register_buffer('v_template', torch.tensor(v_template,
                                                        dtype=dtype))

        self.register_buffer(
            'f',
            torch.tensor(smpl_dict['f'].astype(np.int32), dtype=torch.int32))

        if len(params):
            if 'betas' in params.keys():
                num_betas = params['betas'].shape[1]
            if 'dmpls' in params.keys():
                num_dmpls = params['dmpls'].shape[1]

        num_total_betas = smpl_dict['shapedirs'].shape[-1]
        if num_betas < 1:
            num_betas = num_total_betas

        shapedirs = smpl_dict['shapedirs'][:, :, :num_betas]
        self.register_buffer('shapedirs', torch.tensor(shapedirs, dtype=dtype))

        if self.model_type == 'smplx':
            begin_shape_id = 300 if smpl_dict['shapedirs'].shape[
                -1] > 300 else 10
            exprdirs = smpl_dict['shapedirs'][:, :,
                                              begin_shape_id:(begin_shape_id +
                                                              num_expressions)]
            self.register_buffer('exprdirs', torch.tensor(exprdirs,
                                                          dtype=dtype))

            expression = torch.tensor(np.zeros((batch_size, num_expressions)),
                                      dtype=dtype,
                                      requires_grad=True)
            self.register_parameter(
                'expression', nn.Parameter(expression, requires_grad=True))

        if self.use_dmpl:
            dmpldirs = np.load(path_dmpl)['eigvec']

            dmpldirs = dmpldirs[:, :, :num_dmpls]
            self.register_buffer('dmpldirs', torch.tensor(dmpldirs,
                                                          dtype=dtype))

        # Regressor for joint locations given shape - 6890 x 24
        elif '.pkl' in bm_path:
            self.register_buffer(
                'J_regressor',
                torch.tensor(smpl_dict['J_regressor'].todense(), dtype=dtype))
        else:
            self.register_buffer(
                'J_regressor',
                torch.tensor(smpl_dict['J_regressor'], dtype=dtype))

        # Pose blend shape basis: 6890 x 3 x 207, reshaped to 6890*30 x 207
        if use_posedirs:
            posedirs = smpl_dict['posedirs']
            posedirs = posedirs.reshape([posedirs.shape[0] * 3, -1]).T
            self.register_buffer('posedirs', torch.tensor(posedirs,
                                                          dtype=dtype))
        else:
            self.posedirs = None

        # indices of parents for each joints
        kintree_table = smpl_dict['kintree_table'].astype(np.int32)
        self.register_buffer('kintree_table',
                             torch.tensor(kintree_table, dtype=torch.int32))

        # LBS weights
        # weights = np.repeat(smpl_dict['weights'][np.newaxis], batch_size, axis=0)
        weights = smpl_dict['weights']
        self.register_buffer('weights', torch.tensor(weights, dtype=dtype))

        if 'trans' in params.keys():
            trans = params['trans']
        else:
            trans = torch.tensor(np.zeros((batch_size, 3)),
                                 dtype=dtype,
                                 requires_grad=True)
        self.register_parameter('trans', nn.Parameter(trans,
                                                      requires_grad=True))

        # root_orient
        # if self.model_type in ['smpl', 'smplh']:
        root_orient = torch.tensor(np.zeros((batch_size, 3)),
                                   dtype=dtype,
                                   requires_grad=True)
        self.register_parameter('root_orient',
                                nn.Parameter(root_orient, requires_grad=True))

        # pose_body
        if self.model_type in ['smpl', 'smplh', 'smplx']:
            pose_body = torch.tensor(np.zeros((batch_size, 63)),
                                     dtype=dtype,
                                     requires_grad=True)
            self.register_parameter(
                'pose_body', nn.Parameter(pose_body, requires_grad=True))

        # pose_hand
        if 'pose_hand' in params.keys():
            pose_hand = params['pose_hand']
        else:
            if self.model_type in ['smpl']:
                pose_hand = torch.tensor(np.zeros((batch_size, 1 * 3 * 2)),
                                         dtype=dtype,
                                         requires_grad=True)
            elif self.model_type in ['smplh', 'smplx']:
                pose_hand = torch.tensor(np.zeros((batch_size, 15 * 3 * 2)),
                                         dtype=dtype,
                                         requires_grad=True)
            elif self.model_type in ['mano']:
                pose_hand = torch.tensor(np.zeros((batch_size, 15 * 3)),
                                         dtype=dtype,
                                         requires_grad=True)
        self.register_parameter('pose_hand',
                                nn.Parameter(pose_hand, requires_grad=True))

        # face poses
        if self.model_type == 'smplx':
            pose_jaw = torch.tensor(np.zeros((batch_size, 1 * 3)),
                                    dtype=dtype,
                                    requires_grad=True)
            self.register_parameter('pose_jaw',
                                    nn.Parameter(pose_jaw, requires_grad=True))
            pose_eye = torch.tensor(np.zeros((batch_size, 2 * 3)),
                                    dtype=dtype,
                                    requires_grad=True)
            self.register_parameter('pose_eye',
                                    nn.Parameter(pose_eye, requires_grad=True))

        if 'betas' in params.keys():
            betas = params['betas']
        else:
            betas = torch.tensor(np.zeros((batch_size, num_betas)),
                                 dtype=dtype,
                                 requires_grad=True)
        self.register_parameter('betas', nn.Parameter(betas,
                                                      requires_grad=True))

        if self.use_dmpl:
            if 'dmpls' in params.keys():
                dmpls = params['dmpls']
            else:
                dmpls = torch.tensor(np.zeros((batch_size, num_dmpls)),
                                     dtype=dtype,
                                     requires_grad=True)
            self.register_parameter('dmpls',
                                    nn.Parameter(dmpls, requires_grad=True))
        self.batch_size = batch_size
Beispiel #33
0
    def __init__(self, custom_db=None):
        self.batch_size = 4
        dir_path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
                                'vggface2_resnet')
        filename = 'labels.npy'
        filepath = os.path.join(dir_path, filename)

        if not os.path.exists(filepath):
            raise FileNotFoundError('Label file not found, path=%s' % filepath)

        self.class_names = np.load(filepath)
        self.input_node = tf.placeholder(tf.float32,
                                         shape=(None, 224, 224, 3),
                                         name='image')
        current = self.input_node
        network = {}

        with tf.variable_scope('vgg2', reuse=tf.AUTO_REUSE) as scope:
            # Building the cnn architecture:
            # First block:
            l = tf.layers.conv2d(current,
                                 64, (7, 7),
                                 strides=(2, 2),
                                 padding='SAME',
                                 use_bias=False,
                                 name='conv1/7x7_s2')
            l = tf.layers.batch_normalization(l,
                                              axis=3,
                                              name='conv1/7x7_s2/bn')
            l = tf.nn.relu(l)
            l = tf.layers.max_pooling2d(l, 3, 2)

            # Second block:
            l = conv_block(l, [64, 64, 256], stage=2, block=1, strides=(1, 1))
            l = identity_block(l, [64, 64, 256], stage=2, block=2)
            l = identity_block(l, [64, 64, 256], stage=2, block=3)

            # Third block:
            l = conv_block(l, [128, 128, 512], stage=3, block=1)
            l = identity_block(l, [128, 128, 512], stage=3, block=2)
            l = identity_block(l, [128, 128, 512], stage=3, block=3)
            l = identity_block(l, [128, 128, 512], stage=3, block=4)

            # Fourth block:
            l = conv_block(l, [256, 256, 1024], stage=4, block=1)
            l = identity_block(l, [256, 256, 1024], stage=4, block=2)
            l = identity_block(l, [256, 256, 1024], stage=4, block=3)
            l = identity_block(l, [256, 256, 1024], stage=4, block=4)
            l = identity_block(l, [256, 256, 1024], stage=4, block=5)
            l = identity_block(l, [256, 256, 1024], stage=4, block=6)

            # Fifth block:
            l = conv_block(l, [512, 512, 2048], stage=5, block=1)
            l = identity_block(l, [512, 512, 2048], stage=5, block=2)
            l = identity_block(l, [512, 512, 2048],
                               stage=5,
                               block=3,
                               last_relu=False)

            # Final stage:
            l = tf.layers.average_pooling2d(l, 7, 1)
            l = tf.layers.flatten(l)
            network['feat'] = l
            l = tf.nn.relu(l)
            output = tf.layers.dense(l,
                                     8631,
                                     activation=tf.nn.softmax,
                                     name='classifier')  # 8631 classes
            network['out'] = output

        # Load weights:
        filename = 'weight.h5'
        filepath = os.path.join(dir_path, filename)

        if not os.path.exists(filepath):
            raise FileNotFoundError('Weight file not found, path=%s' %
                                    filepath)

        # Assign weights:
        assign_list = []
        with h5py.File(filepath, mode='r') as f:
            layers = f.attrs['layer_names']
            for layer in layers:
                g = f[layer]

                if isinstance(layer, bytes):
                    layer = layer.decode('utf-8')
                layer_type = get_layer_type(layer)
                if layer_type == 'Conv2D':
                    with tf.variable_scope('vgg2',
                                           reuse=tf.AUTO_REUSE) as scope:
                        conv = tf.get_variable(layer + '/kernel')
                        w = np.asarray(g[layer + '/kernel:0'])
                        assign_op = conv.assign(tf.constant(w))
                        assign_list.append(assign_op)

                elif layer_type == 'BatchNormalization':
                    with tf.variable_scope('vgg2',
                                           reuse=tf.AUTO_REUSE) as scope:
                        beta = tf.get_variable(layer + '/beta')
                        gamma = tf.get_variable(layer + '/gamma')
                        mean = tf.get_variable(layer + '/moving_mean')
                        var = tf.get_variable(layer + '/moving_variance')
                        w = np.asarray(g[layer + '/beta:0'])
                        assign_op = beta.assign(tf.constant(w))
                        assign_list.append(assign_op)
                        w = np.asarray(g[layer + '/gamma:0'])
                        assign_op = gamma.assign(tf.constant(w))
                        assign_list.append(assign_op)
                        w = np.asarray(g[layer + '/moving_mean:0'])
                        assign_op = mean.assign(tf.constant(w))
                        assign_list.append(assign_op)
                        w = np.asarray(g[layer + '/moving_variance:0'])
                        assign_op = var.assign(tf.constant(w))
                        assign_list.append(assign_op)

                elif layer_type == 'Classifier':
                    with tf.variable_scope('vgg2', reuse=tf.AUTO_REUSE):
                        bias = tf.get_variable(layer + '/bias')
                        kernel = tf.get_variable(layer + '/kernel')
                        w = np.asarray(g[layer + '/bias:0'])
                        assign_op = bias.assign(tf.constant(w))
                        assign_list.append(assign_op)
                        w = np.asarray(g[layer + '/kernel:0'])
                        assign_op = kernel.assign(tf.constant(w))
                        assign_list.append(assign_op)

        # Create session:
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        init = tf.global_variables_initializer()

        config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True))
        self.persistent_sess = tf.Session(config=config)

        # Warm-up:
        self.persistent_sess.run(init,
                                 feed_dict={
                                     self.input_node:
                                     np.zeros((self.batch_size, 224, 224, 3),
                                              dtype=np.uint8)
                                 })
        self.persistent_sess.run(
            [assign_list, update_ops],
            feed_dict={
                self.input_node:
                np.zeros((self.batch_size, 224, 224, 3), dtype=np.uint8)
            })

        self.network = network
        self.db = None

        if custom_db:
            db_path = custom_db
        else:
            db_path = DeepFaceConfs.get()['recognizer']['resnet'].get('db', '')
            db_path = os.path.join(dir_path, db_path)
        try:
            with open(db_path, 'rb') as f:
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                self.db = u.load()
        except Exception as e:
            logging.warning('db file not loaded, %s, err=%s' %
                            (db_path, str(e)))
Beispiel #34
0
model = tf.estimator.LinearClassifier(
    feature_columns=FeatCols,
    model_dir='/cs/cbio/orzuk/projects/ContactMaps/data/Jinbo',
    config=run_config)
count = 1
for protein in data:
    print(count)

    model.train(input_fn=input_fn_fen(protein))
    count += 1
'''Testing'''

with open('/cs/cbio/orzuk/projects/ContactMaps/data/PfamTestData.p',
          'rb') as f:
    PfamTestData = pickle._Unpickler(f)
    PfamTestData.encoding = 'latin1'
    PfamTestData = PfamTestData.load()
with open('/cs/cbio/orzuk/projects/ContactMaps/data/Jinbo/LogRegModelNew2',
          'rb') as f:
    LogRegModel50 = pickle._Unpickler(f)
    LogRegModel50.encoding = 'latin1'
    LogRegModel50 = LogRegModel50.load()

with open('LogRegModelNew2', 'rb') as f:
    LogRegModel50 = pickle._Unpickler(f)
    LogRegModel50.encoding = 'latin1'
    LogRegModel50 = LogRegModel50.load()

with open('PfamTestData.p', 'rb') as f:
    PfamTestData = pickle._Unpickler(f)
Beispiel #35
0
 def load_model(self, path):
     u = pickle._Unpickler(open(path, 'rb'))
     u.encoding = 'latin1'
     self.vec = u.load()
Beispiel #36
0
def load_parameters(file_name):
    with open(file_name + ".dat", "rb") as f:
        u = pickle._Unpickler(f)
        net_params = u.load()
        return net_params
Beispiel #37
0
#             ll_param_MAPlist[n].append()
        #produce command and run
            pythonpath=pythonpath+" -p "+n+" "+str(v)+" "+l+" "+u+" "
#             pythonpath=pythonpath+" -p "+n+" "+str(initvalue)+" "+l+" "+u+" "
        if randomstr!=None:
            os.system("rm "+namestr+options.tag+options.model+randomstr+".parameter")
        randomstr=Util.random_str()
        print(pythonpath+" -b "+randomstr+" "+str(int(options.bootstrap[1])))
        sys.stdout.flush()
        a=call_system(pythonpath+" -b "+randomstr+" "+str(int(options.bootstrap[1])))
        if a!=0:
            print("cycle",i,a,"wrong")
            continue
        #collection result
        print(options.fsfile+namestr+options.tag+options.model+"array.pickle")
        u=pickle._Unpickler(open(options.fsfile+namestr+options.tag+options.model+"array.pickle","rb"))
        u.encoding='latin1'
        residualarray=u.load() #pickle.load(open(options.fsfile+namestr+options.tag+options.model+"array.pickle","rb"))
        u=pickle._Unpickler(open(options.fsfile+namestr+options.tag+options.model+"hist.pickle","rb"))
        u.encoding='latin1'
        residualhis=u.load()#pickle.load(open(options.fsfile+namestr+options.tag+options.model+"hist.pickle","rb"))
        bif=open(options.fsfile+namestr+options.tag+options.model+randomstr+"btstrap.temp",'r')
        btstrap=[]
        for e in bif:
            elist=re.split(r"\s+",e.strip())
            for ee in elist:
                btstrap.append(float(ee))
        bif.close()
        os.system("rm "+options.fsfile+namestr+options.tag+options.model+randomstr+"btstrap.temp")
            
#         u=pickle._Unpickler(open(options.fsfile+namestr+options.tag+options.model+randomstr+"btstrap.pickle","rb"))
Beispiel #38
0
def BayesLogisticRegression():
    # Load the MNIST dataset
    print("************ Loading MNIST Data ************")
    with gzip.open('mnist.pkl.gz', 'rb') as f:
        try:
            train_set, valid_set, test_set = pickle.load(f, encoding='latin1')
        except:
            train_set, valid_set, test_set = pickle.load(f)
    f.close()

    #train_mnist=load_mnist(dataset="training", path="/Users/sarahmullin/Box Sync/3/Sarah")

    #load the dataset
    with gzip.open('mnist.pkl.gz','rb') as ff :
        u = pickle._Unpickler( ff )
        u.encoding = 'latin1'
        train, val, test = u.load()


    ###Relabelling MNIST data
    x_train=train[0]
    y_train=train[1]
    x_validate=val[0]
    y_validate=val[1]
    x_test=test[0]
    y_test=test[1]

    ###loading USPS data
    print("************ Loading USPS Data ************")
    usps_data = []
    usps_label = []
    path_to_data = "./proj3_images/Numerals/"
    img_list = os.listdir(path_to_data)
    sz = (28,28)
    for i in range(10):
        label_data = path_to_data + str(i) + '/'
        img_list = os.listdir(label_data)
        for name in img_list:
            if '.png' in name:
                img = cv2.imread(label_data+name)
                img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
                resized_img = resize_and_scale(img, sz, 255)
                usps_data.append(resized_img.flatten())
                usps_label.append(i)   
    usps_data = np.array(usps_data)
    #print("USPS Data ",usps_data.shape)
    usps_label= np.array(usps_label)


    posterior_fit, pr_bayes, w_bayes, b_bayes=SGD_bayesiansoftmax(X=x_train, t=y_train, sigma=2, minibatches=1, random_seed=1, epochs=100, learning_rate=0.001)


    ####plot cross entropy by iterations
    # evenly sampled time at 200ms intervals
    iterations = np.arange(start=1, stop=101, step=1)
    #plt.plot(iterations, costfunction_cross)
    #plt.ylabel("Cost Function")
    #plt.xlabel("Epoch")
    #plt.show()

    plt.plot(iterations, posterior_fit)
    plt.ylabel("NegLogLikelihood of Posterior Distribution")
    plt.xlabel("Epoch")
    plt.show()
    
    print("W_map:",w_bayes, "Bias_map", b_bayes)
Beispiel #39
0
def unpickle(file):
    with open(file, 'rb') as fo:
        u = pickle._Unpickler(fo)
        u.encoding = 'latin1'
        dict = u.load()
    return dict
Beispiel #40
0
def pickle_load_compat(f):
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    return u.load()
Beispiel #41
0
One should take these coherence features and append the right demographic and diagnostic features
"""

import pandas as pd
import csv
import os, time
import numpy as np
import pickle
from numpy import genfromtxt
from sklearn import preprocessing

# Loading data hard coded path
os.chdir(r"C:\Users\piete\OneDrive\Documenten\BSc\scriptie\code")
# Raw coherence measures
with open("4april", 'rb') as f:
    u = pickle._Unpickler(f)
    u.encoding = 'latin1'
    p = u.load()
    data = p

demo_data = []
with open("demographic_data_29apr.csv") as csvfile:
    reader = csv.reader(csvfile)  # change contents to floats
    for row in reader:  # each row is a list
        demo_data.append(row)
medicine = []
with open("lijst_medicijnen.csv") as csvfile:
    reader = csv.reader(csvfile, delimiter=";")  # change contents to floats
    next(reader, None)  # skip header
    for row in reader:  # each row is a list
        medicine.append(row)
Beispiel #42
0
def read_eeg_signal_from_file(filename):
    print(filename)
    x = pickle._Unpickler(open(filename, 'rb'))
    x.encoding = 'latin1'
    p = x.load()
    return p
Beispiel #43
0
 def _read_conds(self, file_path):
     with open(file_path, 'rb') as f:
         u = pickle._Unpickler(f)
         u.encoding = 'latin1'
         p = u.load()
         return p
Beispiel #44
0
def read_pickle(pkl_path):
    with open(pkl_path, 'rb') as f:
        u = pickle._Unpickler(f)
        u.encoding = 'latin1'
        return u.load()
Beispiel #45
0
def _loads_with_check(s, *args, **kwargs):
    unpickler = pickle._Unpickler(io.BytesIO(s), *args, **kwargs)
    ret = unpickler.load()
    assert unpickler.proto == TEST_PICKLE_PROTOCOL, \
        f'Pickle protocol {unpickler.proto} not agree with {TEST_PICKLE_PROTOCOL}'
    return ret
Beispiel #46
0
def get_iemocap_raw(classes):
    if sys.version_info[0] == 2:
        f = open("dataset/iemocap/raw/IEMOCAP_features_raw.pkl", "rb")
        videoIDs, videoSpeakers, videoLabels, videoText, videoAudio, videoVisual, videoSentence, trainVid, testVid = pickle.load(
            f)
        '''
        label index mapping = {'hap':0, 'sad':1, 'neu':2, 'ang':3, 'exc':4, 'fru':5}
        '''
    else:
        f = open("dataset/iemocap/raw/IEMOCAP_features_raw.pkl", "rb")
        u = pickle._Unpickler(f)
        u.encoding = 'latin1'
        videoIDs, videoSpeakers, videoLabels, videoText, videoAudio, videoVisual, videoSentence, trainVid, testVid = u.load(
        )
        '''
        label index mapping = {'hap':0, 'sad':1, 'neu':2, 'ang':3, 'exc':4, 'fru':5}
        '''

    # print(len(trainVid))
    # print(len(testVid))

    train_audio = []
    train_text = []
    train_visual = []
    train_seq_len = []
    train_label = []

    test_audio = []
    test_text = []
    test_visual = []
    test_seq_len = []
    test_label = []
    for vid in trainVid:
        train_seq_len.append(len(videoIDs[vid]))
    for vid in testVid:
        test_seq_len.append(len(videoIDs[vid]))

    max_len = max(max(train_seq_len), max(test_seq_len))
    print('max_len', max_len)
    for vid in trainVid:
        train_label.append(videoLabels[vid] + [0] *
                           (max_len - len(videoIDs[vid])))
        pad = [np.zeros(videoText[vid][0].shape)
               ] * (max_len - len(videoIDs[vid]))
        text = np.stack(videoText[vid] + pad, axis=0)
        train_text.append(text)

        pad = [np.zeros(videoAudio[vid][0].shape)
               ] * (max_len - len(videoIDs[vid]))
        audio = np.stack(videoAudio[vid] + pad, axis=0)
        train_audio.append(audio)

        pad = [np.zeros(videoVisual[vid][0].shape)
               ] * (max_len - len(videoIDs[vid]))
        video = np.stack(videoVisual[vid] + pad, axis=0)
        train_visual.append(video)

    for vid in testVid:
        test_label.append(videoLabels[vid] + [0] *
                          (max_len - len(videoIDs[vid])))
        pad = [np.zeros(videoText[vid][0].shape)
               ] * (max_len - len(videoIDs[vid]))
        text = np.stack(videoText[vid] + pad, axis=0)
        test_text.append(text)

        pad = [np.zeros(videoAudio[vid][0].shape)
               ] * (max_len - len(videoIDs[vid]))
        audio = np.stack(videoAudio[vid] + pad, axis=0)
        test_audio.append(audio)

        pad = [np.zeros(videoVisual[vid][0].shape)
               ] * (max_len - len(videoIDs[vid]))
        video = np.stack(videoVisual[vid] + pad, axis=0)
        test_visual.append(video)

    train_text = np.stack(train_text, axis=0)
    train_audio = np.stack(train_audio, axis=0)
    train_visual = np.stack(train_visual, axis=0)
    # print(train_text.shape)
    # print(train_audio.shape)
    # print(train_visual.shape)

    # print()
    test_text = np.stack(test_text, axis=0)
    test_audio = np.stack(test_audio, axis=0)
    test_visual = np.stack(test_visual, axis=0)
    # print(test_text.shape)
    # print(test_audio.shape)
    # print(test_visual.shape)
    train_label = np.array(train_label)
    test_label = np.array(test_label)
    train_seq_len = np.array(train_seq_len)
    test_seq_len = np.array(test_seq_len)
    # print(train_label.shape)
    # print(test_label.shape)
    # print(train_seq_len.shape)
    # print(test_seq_len.shape)

    train_mask = np.zeros((train_text.shape[0], train_text.shape[1]),
                          dtype='float')
    for i in range(len(train_seq_len)):
        train_mask[i, :train_seq_len[i]] = 1.0

    test_mask = np.zeros((test_text.shape[0], test_text.shape[1]),
                         dtype='float')
    for i in range(len(test_seq_len)):
        test_mask[i, :test_seq_len[i]] = 1.0

    train_label, test_label = createOneHot(train_label, test_label)

    train_data = np.concatenate((train_audio, train_visual, train_text),
                                axis=-1)
    test_data = np.concatenate((test_audio, test_visual, test_text), axis=-1)

    return train_data, test_data, train_audio, test_audio, train_text, test_text, train_visual, test_visual, train_label, test_label, train_seq_len, test_seq_len, train_mask, test_mask
Beispiel #47
0
def unpickle(file):
    with open(os.path.join(DATA_PATH, file), 'rb') as fo:
        u = pickle._Unpickler(fo)
        u.encoding = 'latin1'
        dict = u.load()
    return dict
Beispiel #48
0
def get_raw_data(data, classes):
    if data == 'iemocap':
        return get_iemocap_raw(classes)
    mode = 'audio'
    with open(
            './dataset/{0}/raw/{1}_{2}way.pickle'.format(data, mode, classes),
            'rb') as handle:
        u = pickle._Unpickler(handle)
        u.encoding = 'latin1'
        if data == 'mosi':
            (audio_train, train_label, audio_test, test_label, _, train_length,
             test_length) = u.load()
        elif data == 'mosei':
            (audio_train, train_label, _, _, audio_test, test_label, _,
             train_length, _, test_length, _, _, _) = u.load()
            print(test_label.shape)

    mode = 'text'
    with open(
            './dataset/{0}/raw/{1}_{2}way.pickle'.format(data, mode, classes),
            'rb') as handle:
        u = pickle._Unpickler(handle)
        u.encoding = 'latin1'
        if data == 'mosi':
            (text_train, train_label, text_test, test_label, _, train_length,
             test_length) = u.load()
        elif data == 'mosei':
            (text_train, train_label, _, _, text_test, test_label, _,
             train_length, _, test_length, _, _, _) = u.load()
            print(test_label.shape)

    mode = 'video'
    with open(
            './dataset/{0}/raw/{1}_{2}way.pickle'.format(data, mode, classes),
            'rb') as handle:
        u = pickle._Unpickler(handle)
        u.encoding = 'latin1'
        if data == 'mosi':
            (video_train, train_label, video_test, test_label, _, train_length,
             test_length) = u.load()
        elif data == 'mosei':
            (video_train, train_label, _, _, video_test, test_label, _,
             train_length, _, test_length, _, _, _) = u.load()
            print(test_label.shape)

    print('audio_train', audio_train.shape)
    print('audio_test', audio_test.shape)
    print('train_label', train_label.shape)
    train_data = np.concatenate((audio_train, video_train, text_train),
                                axis=-1)
    test_data = np.concatenate((audio_test, video_test, text_test), axis=-1)

    train_label = train_label.astype('int')
    test_label = test_label.astype('int')
    print(train_data.shape)
    print(test_data.shape)
    train_mask = np.zeros((train_data.shape[0], train_data.shape[1]),
                          dtype='float')
    for i in range(len(train_length)):
        train_mask[i, :train_length[i]] = 1.0

    test_mask = np.zeros((test_data.shape[0], test_data.shape[1]),
                         dtype='float')
    for i in range(len(test_length)):
        test_mask[i, :test_length[i]] = 1.0

    train_label, test_label = createOneHot(train_label, test_label)

    print('train_mask', train_mask.shape)

    seqlen_train = train_length
    seqlen_test = test_length

    return train_data, test_data, audio_train, audio_test, text_train, text_test, video_train, video_test, train_label, test_label, seqlen_train, seqlen_test, train_mask, test_mask
Beispiel #49
0
def get(batch=0):
    # key_id = 58 #
    # model_id = "00025"

    _file = open(os.path.join(root, 'txt', "front_rt.pkl"), "rb")
    data = pickle._Unpickler(_file)
    data.encoding = 'latin1'

    data = data.load()
    _file.close()
    flage = False
    print(len(data))
    for k, item in enumerate(data[4000 * batch:4000 * (batch + 1)]):

        key_id = item[-1]
        # if  k == 5689:
        #     flage = True

        # if flage == False:
        #     continue
        print('++++++++++++++++++++++++++++++++%d' % (k + 4000 * batch))
        video_path = os.path.join(root, 'unzip', item[0] + '.mp4')

        reference_img_path = video_path[:-4] + '_%05d.png' % key_id

        reference_prnet_lmark_path = video_path[:-4] + '_prnet.npy'

        original_obj_path = video_path[:-4] + '_original.obj'

        rt_path = video_path[:-4] + '_sRT.npy'
        lmark_path = video_path[:-4] + '_front.npy'

        if os.path.exists(video_path[:-4] + '_ani.mp4'):
            print('=====')
            continue

        if not os.path.exists(original_obj_path) or not os.path.exists(
                reference_prnet_lmark_path) or not os.path.exists(
                    lmark_path) or not os.path.exists(rt_path):

            print(original_obj_path)
            print('++++')
            continue
        try:
            # extract the frontal facial landmarks for key frame
            lmk3d_all = np.load(lmark_path)
            lmk3d_target = lmk3d_all[key_id]

            # load the 3D facial landmarks on the PRNet 3D reconstructed face
            lmk3d_origin = np.load(reference_prnet_lmark_path)
            # lmk3d_origin[:,1] = res - lmk3d_origin[:,1]

            # load RTs
            rots, trans = recover(np.load(rt_path))

            # calculate the affine transformation between PRNet 3D face and the frotal face landmarks
            lmk3d_origin_homo = np.hstack(
                (lmk3d_origin, np.ones([lmk3d_origin.shape[0], 1])))  # 68x4
            p_affine = np.linalg.lstsq(lmk3d_origin_homo,
                                       lmk3d_target,
                                       rcond=1)[0].T  # Affine matrix. 3 x 4
            pr = p_affine[:, :3]  # 3x3
            pt = p_affine[:, 3:]  # 3x1

            # load the original 3D face mesh then transform it to align frontal face landmarks
            vertices_org, triangles, colors = load_obj(
                original_obj_path)  # get unfrontalized vertices position
            vertices_origin_affine = (pr @ (vertices_org.T) +
                                      pt).T  # aligned vertices

            # set up the renderer
            renderer = setup_renderer()
            # generate animation

            temp_path = './tempp_%05d' % batch

            # generate animation
            if os.path.exists(temp_path):
                shutil.rmtree(temp_path)
            os.mkdir(temp_path)
            # writer = imageio.get_writer('rotation.gif', mode='I')
            for i in range(rots.shape[0]):
                # get rendered frame
                vertices = (
                    rots[i].T @ (vertices_origin_affine.T - trans[i])).T
                face_mesh = sr.Mesh(vertices,
                                    triangles,
                                    colors,
                                    texture_type="vertex")
                image_render = get_np_uint8_image(
                    face_mesh, renderer)  # RGBA, (224,224,3), np.uint8

                #save rgba image as bgr in cv2
                rgb_frame = (image_render).astype(int)[:, :, :-1][..., ::-1]
                cv2.imwrite(temp_path + "/%05d.png" % i, rgb_frame)
            command = 'ffmpeg -framerate 25 -i ' + temp_path + '/%5d.png  -c:v libx264 -y -vf format=yuv420p ' + video_path[:
                                                                                                                            -4] + '_ani.mp4'
            os.system(command)
            # break
        except:
            print('===++++')
            continue
Beispiel #50
0
def main(opt):
    if opt.save_json_file != "None":
        dict_opts = vars(opt)
        with open(opt.save_json_file, 'w') as f:
            json.dump(dict_opts, f, sort_keys=True, indent=4)

    # Initialize joints used
    if not(opt.init_pose_joints == "None"):
        init_joints_list = [int(item)
                            for item in opt.init_pose_joints.split(',')]
    else:
        init_joints_list = None

    if not(opt.ref_joint_list_coup == "None"):
        ref_joints_list = [int(item)
                           for item in opt.ref_joint_list_coup.split(',')]
    else:
        ref_joints_list = None

    # GET FILES
    TWO_UP = up(up(os.path.abspath(__file__)))

    opt.init_pose_path = os.path.join(TWO_UP, 'assets/apose.pkl')
    opt.fmap_path = os.path.join(
        TWO_UP, 'assets/fmaps/{}.npy'.format(opt.gar_type))
    opt.cam_file = os.path.join(TWO_UP, 'assets/cam_file.pkl')
    opt.template_mesh_path = os.path.join(
        TWO_UP, 'assets/init_meshes/{}.obj'.format(opt.gar_type))
    opt.template_mesh_pkl_path = os.path.join(
        TWO_UP, 'assets/init_meshes/{}.pkl'.format(opt.gar_type))
    opt.gar_file_path = os.path.join(TWO_UP, 'assets/gar_file.pkl')

    # Get camera params
    cam_file = open(opt.cam_file, 'rb')
    u = pkl._Unpickler(cam_file)
    u.encoding = 'latin1'
    cam_data = u.load()
    opt.cam_z, opt.cam_y = cam_data[opt.gar_type]['cam_z'], cam_data[opt.gar_type]['cam_y']

    # Get vertex and face ids
    gar_file = open(opt.gar_file_path, 'rb')
    u = pkl._Unpickler(gar_file)
    u.encoding = 'latin1'
    gar = u.load()
    v_ids_template = gar[opt.gar_type]['vert_indices']
    faces_template = gar[opt.gar_type]['f']

    # Get vertex ids and faces for the template
    vertices_template, faces_side, v_ids_side = get_part(
        opt.front, opt.fmap_path, opt.template_mesh_path)

    # Initialize the SMPL template
    template_smpl = init_smpl(gender=opt.gender, init_pose_path=opt.init_pose_path, gar_file_path=opt.gar_file_path,
                              template_file_pkl_path=opt.template_mesh_pkl_path, gar_type=opt.gar_type)

    # Get masks and distance transforms
    mask = get_mask(opt.mask_file)
    dist_i, dist_o, dif_mask = get_dist_tsfs(mask)
    # ==============================================
    #               FIRST STAGE
    # ==============================================

    # Initialize camera and renderer
    # Initialize debug camera and renderer

    debug_cam_init, debug_rend_init = get_cam_rend(
        verts=template_smpl[v_ids_template][v_ids_side], faces=faces_side, cam_y=opt.cam_y, cam_z=opt.cam_z)

    opt_cam_init, opt_rend_init = get_cam_rend(
        verts=template_smpl[v_ids_template][v_ids_side], faces=faces_side, cam_y=opt.cam_y, cam_z=opt.cam_z)

    part_mesh, temp_params = init_fit(opt=opt,
                                      dist_o=dist_o, dist_i=dist_i, dif_mask=dif_mask, rn_m=opt_rend_init, smpl_h=template_smpl, v_ids_template=v_ids_template,
                                      faces_template=faces_template, debug_rn=debug_rend_init,  v_ids_side=v_ids_side, faces_side=faces_side,
                                      joints_list=init_joints_list
                                      )

    # ==============================================
    #               REFINEMENT STAGE
    # ==============================================

    v = np.array(part_mesh.v)
    v_offset = ch.zeros(v.shape)

    dp = SmplPaths(gender=opt.gender)
    smpl_h_refine = Smpl(dp.get_hres_smpl_model_data())

    data = temp_params
    smpl_h_refine.pose[:] = data["pose"]
    smpl_h_refine.trans[:] = data["trans"]
    smpl_h_refine.betas[:] = data["betas"]
    smpl_h_refine.v_personal[:] = data["v_personal"]

    # Initialize second camera and renderer
    # Initialize second debug camera and renderer

    debug_cam_ref, debug_rend_ref = get_cam_rend(
        verts=v[v_ids_side] + v_offset[v_ids_side], faces=faces_side, cam_y=opt.cam_y, cam_z=opt.cam_z)
    opt_cam_ref, opt_rend_ref = get_cam_rend(
        verts=v[v_ids_side] + v_offset[v_ids_side], faces=faces_side, cam_y=opt.cam_y, cam_z=opt.cam_z)

    # Rings and camera for the projection error
    gar_rings = compute_boundaries(v + v_offset, faces_template)
    position_largest_ring = get_verts_rings(
        gar_rings=gar_rings, verts=v + v_offset, v_ids_side=v_ids_side)
    proj_cam_ref, _ = get_cam_rend(
        verts=position_largest_ring, faces=faces_side, cam_y=opt.cam_y, cam_z=opt.cam_z)
    max_y, min_y = get_max_min_mask(mask)

    final_verts, final_iou = final_fit(
        opt=opt, part_mesh=part_mesh, v=v, v_offset=v_offset, dist_o=dist_o, dist_i=dist_i,
        smpl_h_ref=smpl_h_refine, rn_m=opt_rend_ref, debug_rn=debug_rend_ref, dif_mask=dif_mask,
        v_ids_template=v_ids_template, faces_template=faces_template, v_ids_side=v_ids_side,
        faces_side=faces_side, max_y=max_y, proj_cam=proj_cam_ref, ref_joint_list_coup=ref_joints_list
    )

    mesh_sv = Mesh(v=final_verts, f=faces_template)
    mesh_sv.write_obj(opt.save_file)

    if opt.save_iou_file != "None":
        with open(opt.save_iou_file, 'a+') as fp:
            fp.write('{} , {} \n'.format(opt.save_file, str(final_iou)))

        fp.close()
Beispiel #51
0
def dill_load(name, folder=False, sim=False, quiet=True):
    """This scripts loads an dill-file. It automatically checks known folders if no folder is specified.
    Args:
        name:        Name of dill-file  (<name>.dill)
        folder:        Folder containing dill file
        sim:        Simulation for checking automatically folders for

    Example:
       to read ".pc/sim.dill" use: dill_load('sim', '.pc')
       or simply dill_load('sim'), since dill_load checks for following folders automatically: '.pc', 'data/.pc'
    """

    import dill
    from os.path import join, exists

    if folder=='pc' and name.startswith('pc/'): name=name[3:]
    if (not name.endswith('.dill')): name = name+'.dill' # add .dill to name if not already ending with

    # if folder is not defined try to find the dill-file at typical places
    sim_path = '.'
    if sim: sim_path = sim.path
    if not folder:
        if exists(join(sim_path, 'pc', name)):
            folder = join(sim_path, 'pc')
            if not quiet: print('~ Found '+name+' in '+folder)
        elif exists(join(sim_path, 'data/pc', name)):
            folder = join(sim_path, 'data/pc')
            if not quiet: print('~ Found '+name+' in '+folder)
        elif exists(join(sim_path, '.', name)):
            folder = join(sim_path, '.')
            if not quiet: print('~ Found '+name+' in '+folder)
        else:
            print('!! ERROR: Couldnt find file '+name);        return False

    # open file
    filepath = join(folder, name)
    if not quiet: print(filepath)
    # from pencilnew.io import debug_breakpoint; debug_breakpoint()
    try:                                                   # check on existance
        if not exists(filepath) or not exists(join(sim_path, filepath)):
            print('!! ERROR: dill_load couldnt load '+filepath); return False
        # try:                                               # open file and return it
        with open(filepath, 'rb') as f:
            obj = dill.load(f)
        return obj
        # except:
            # with open(join(sim_path, filepath), 'rb') as f:
                # obj = dill.load(f)
            # return obj

    except: # if anything goes wrong, try dry importing, i.e. if python2 and python3 usage was mixed
        print('? Something went wrong with the dill importer, trying backup solution..')
        try:
            import pickle
            with open(filepath, 'rb') as f:
                u = pickle._Unpickler(f)
                u.encoding = 'latin1'
                data = u.load()
                print('? Success!')
                return data
        except:
            print('!! ERROR: Something went wrong while importing dill-file: '+filepath); return False
# Output sizes of the layers (we need this for batchnorm initialization)
S = (96, 96, 96, 96, 192, 192, 192, 192, 192, 192, 10, 10)  # TODO Should be automatically calculated at some point

# Factor by which to multiply the reconstruction cost for each layer
# - all 0.0 for supervised-only; last 1.0 and all others 0.0 for Gamma model
# - we have L+1 entries since entry 0 is for denoising input
denoising_costs = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]

# ----------------------------------------------------------------------------------------------------------------------
"""
Load data.
"""

# Downloaded MNIST from http://deeplearning.net/tutorial/gettingstarted.html
with open('./data/50k_labels.pkl', 'rb') as f:
    unpickler = pickle._Unpickler(f)
    unpickler.encoding = 'latin1'  # need this bc of some Python3 problem
    d = unpickler.load()
    data_tr = d['train_data']
    labels_tr = d['train_labels']
    data_te = d['test_data']
    labels_te = d['test_labels']
    del d


# Function for shuffling data (and possibly labels in unison)
def shuffle_data(data, labels=None):
    perm = np.random.permutation(data.shape[0])
    shuffled_data = data[perm]
    shuffled_labels = None if labels is None else labels[perm]
    return shuffled_data, shuffled_labels
Beispiel #53
0
def unpickle(filename):
    with open(filename, 'rb') as f:
        u = cPickle._Unpickler(f)
        u.encoding = 'latin1'
        p = u.load()
        return p
Beispiel #54
0
def column(setdegree, LGN):
    """"
    Visual Cortex (V1) Oriented Column -

    This function creates a cortical column trained with a specific orientation (pre-trained 'soma_exc' IE values)

    The column is composed by:
        - Pyramidal (Pyr)               layer 2/3
        - Inhibitory interneurons(inh)  layer 2/3
        - Spiny Stellate Cells(SS)      layer 4
        - Inhibitory interneurons       layer 4
        - Pyramidal                     layer 5
        - Inhibitory interneurons       layer 5
        - Pyramidal                     layer 6
        - Inhibitory interneurons       layer 6

    """
    import matplotlib.pyplot as plt
    import matplotlib
    import pickle
    import gzip
    import numpy
    import nest
    import nest.raster_plot
    import numpy as np

    import scipy.io
    import pickle
    if not 'lifl_psc_exp_ie' in nest.Models():
        nest.Install('LIFL_IEmodule')


    SS4 = nest.Create('lifl_psc_exp_ie', 324, {'I_e': 0.0,  # 122.1
                           'V_m': -70.0,
                           'E_L': -65.0,
                           'V_th': -50.0,
                           'V_reset': -65.0,
                           'C_m': 250.0,
                           'tau_m': 10.0,
                           'tau_syn_ex': 2.0,
                           'tau_syn_in': 2.0,
                           't_ref': 2.0,
                           'std_mod': False,
                           'lambda': 0.0005,
                           'tau': 12.5, })


    lgn2v1_delay = 1.0 # Delay from LGN to Cortex

    nest.Connect(LGN, SS4, {'rule': 'one_to_one'}, {
        "weight": 15000.0,
        "delay": lgn2v1_delay})

    # Target neuron. Connections are set in order to produce a target spike only in pattern detection.
    Pyr23 = nest.Create('aeif_psc_exp_peak', 324, {
                            'I_e': 0.0,
                            'V_m': -70.0,
                            'E_L': -70.0,
                            'V_th': -50.0,
                            'V_reset': -55.0,
                            'C_m': 250.0,
                            'tau_syn_ex': 2.0,
                            'tau_syn_in': 2.0,
                            't_ref': 2.0,
                            'g_L': 980.0
    })
    for idn in Pyr23:
        nest.SetStatus([idn], {'V_m': (-65.0 + np.random.rand()*10.0)})

    Pyr5 = nest.Create('aeif_psc_exp_peak', 81,
                       {   'I_e': 0.0,
                           'V_m': -70.0,
                           'E_L': -70.0,
                           'V_th': -50.0,
                           'V_reset': -55.0,
                           'C_m': 250.0,
                           'tau_syn_ex': 2.0,
                           'tau_syn_in': 2.0,
                           't_ref': 2.0,
                           'g_L': 980.0
                       })
    for idn in Pyr5:
        nest.SetStatus([idn], {'V_m': (-65.0 + np.random.rand()*10.0)})
    Pyr6 = nest.Create('aeif_psc_exp_peak', 243,
                       {  'I_e': 0.0,
                           'V_m': -70.0,
                           'E_L': -70.0,
                           'V_th': -50.0,
                           'V_reset': -55.0,
                           'C_m': 250.0,
                           'tau_syn_ex': 2.0,
                           'tau_syn_in': 2.0,
                           't_ref': 2.0,
                           'g_L': 980.0
                       })
    for idn in Pyr6:
        nest.SetStatus([idn], {'V_m': (-65.0 + np.random.rand()*10.0)})

    # Poisson Noise Generators
    poisson_activityL23 = nest.Create('poisson_generator', 1)
    nest.SetStatus(poisson_activityL23, {'rate': 1721500.0})
    poisson_activityL5 = nest.Create('poisson_generator', 1)
    nest.SetStatus(poisson_activityL5, {'rate': 1740000.0})
    poisson_activityL6 = nest.Create('poisson_generator', 1)
    nest.SetStatus(poisson_activityL6, {'rate': 1700000.0})
    poisson_activityInh = nest.Create('poisson_generator', 1)
    nest.SetStatus(poisson_activityInh, {'rate': 1750000.0})
    nest.Connect(poisson_activityL23, Pyr23, {'rule': 'all_to_all'}, {"weight": 5.0})
    nest.Connect(poisson_activityL5, Pyr5, {'rule': 'all_to_all'}, {"weight": 5.0})
    nest.Connect(poisson_activityL6, Pyr6, {'rule': 'all_to_all'}, {"weight": 5.0})


    # FeedForward
    nest.Connect(Pyr23, Pyr5, {'rule': 'fixed_indegree', 'indegree': 15}, {"weight": 100.0, "delay": 1.0})
    nest.Connect(Pyr5, Pyr6, {'rule': 'fixed_indegree', 'indegree': 20}, {"weight": 100.0, "delay": 1.0})

    ## Connections between layers
    nest.Connect(Pyr23, Pyr23, {'rule': 'fixed_indegree', 'indegree': 36}, {"weight": 100.0, "delay": 1.0})
    nest.Connect(Pyr5, Pyr5, {'rule': 'fixed_indegree', 'indegree': 10}, {"weight": 100.0, "delay": 1.0})
    nest.Connect(Pyr6, Pyr6, {'rule': 'fixed_indegree', 'indegree': 20}, {"weight": 100.0, "delay": 1.0})

    In4 = nest.Create('aeif_psc_exp_peak', 65,
                      {
                          'I_e': 0.0,
                          'V_m': -70.0,
                          'E_L': -70.0,
                          'V_th': -50.0,
                          'V_reset': -55.0,
                          'C_m': 250.0,
                          'tau_syn_ex': 2.0,
                          'tau_syn_in': 2.0,
                          't_ref': 1.0,
                          'g_L': 980.0
                      })
    nest.Connect(poisson_activityInh, In4, {'rule': 'all_to_all'}, {"weight": 4.9})
    nest.Connect(SS4, In4, {'rule': 'fixed_indegree', 'indegree': 32}, {"weight": 100.0, "delay": 1.0})
    nest.Connect(In4, SS4, {'rule': 'fixed_indegree', 'indegree': 6}, {"weight": -100.0, "delay": 1.0})
    nest.Connect(In4, In4, {'rule': 'fixed_indegree', 'indegree': 6}, {"weight": -100.0, "delay": 1.0})

    poisson_activity_inh = nest.Create('poisson_generator', 1)
    nest.SetStatus(poisson_activity_inh, {'rate': 340000.0})
    In23 = nest.Create('aeif_psc_exp_peak', 65,
                       {
                           'I_e': 0.0,
                           'V_m': -70.0,
                           'E_L': -70.0,
                           'V_th': -50.0,
                           'V_reset': -55.0,
                           'C_m': 250.0,
                           'tau_syn_ex': 2.0,
                           'tau_syn_in': 2.0,
                           't_ref': 1.0,
                           'g_L': 980.0
                       })

    nest.Connect(poisson_activityInh, In23, {'rule': 'all_to_all'}, {"weight": 5.0});
    nest.Connect(Pyr23, In23, {'rule': 'fixed_indegree', 'indegree': 35}, {"weight": 100.0, "delay": 1.0})
    nest.Connect(In23, Pyr23, {'rule': 'fixed_indegree', 'indegree': 8}, {"weight": -100.0, "delay": 1.0})
    nest.Connect(In23, In23, {'rule': 'fixed_indegree', 'indegree': 8}, {"weight": -100.0, "delay": 1.0})

    In5 = nest.Create('aeif_psc_exp_peak', 16,
                      {
                          'I_e': 0.0,
                          'V_m': -70.0,
                          'E_L': -70.0,
                          'V_th': -50.0,
                          'V_reset': -55.0,
                          'C_m': 250.0,
                          'tau_syn_ex': 2.0,
                          'tau_syn_in': 2.0,
                          't_ref': 1.0,
                          'g_L': 980.0
                      })

    nest.Connect(poisson_activityInh, In5, {'rule': 'all_to_all'}, {"weight": 5.0});
    nest.Connect(Pyr5, In5, {'rule': 'fixed_indegree', 'indegree': 30}, {"weight": 100.0, "delay": 1.0})
    nest.Connect(In5, Pyr5, {'rule': 'fixed_indegree', 'indegree': 8}, {"weight": -100.0, "delay": 1.0})
    nest.Connect(In5, In5, {'rule': 'fixed_indegree', 'indegree': 8}, {"weight": -100.0, "delay": 1.0})

    In6 = nest.Create('aeif_psc_exp_peak', 49,
                      {
                          'I_e': 0.0,
                          'V_m': -70.0,
                          'E_L': -70.0,
                          'V_th': -50.0,
                          'V_reset': -55.0,
                          'C_m': 250.0,
                          'tau_syn_ex': 2.0,
                          'tau_syn_in': 2.0,
                          't_ref': 1.0,
                          'g_L': 980.0
                      })
    nest.Connect(poisson_activityInh, In6, {'rule': 'all_to_all'}, {"weight": 5.0});
    nest.Connect(Pyr6, In6, {'rule': 'fixed_indegree', 'indegree': 32}, {"weight": 100.0, "delay": 1.0})
    nest.Connect(In6, Pyr6, {'rule': 'fixed_indegree', 'indegree': 6}, {"weight": -100.0, "delay": 1.0})
    nest.Connect(In6, In6, {'rule': 'fixed_indegree', 'indegree': 6}, {"weight": -100.0, "delay": 1.0})

    # Here we load the Soma_exc (IE value) trained before for each preferred angle.
    exec(
        'file = "./files/soma_exc_15_' + str(
            setdegree) + '.pckl"', None, globals())
    with open(file, 'rb') as f:
        u = pickle._Unpickler(f)
        u.encoding = 'latin1'
        SS4_soma_exc_raw = u.load()
        SS4_soma_exc = SS4_soma_exc_raw[:]
        del SS4_soma_exc_raw, setdegree

    for i in range(0, 324):
        nest.SetStatus([SS4[i]], {'soma_exc': SS4_soma_exc[i]})

    k = 0
    for j in range(0, 324, 36):
        for i in range(0, 18, 2):

            # Set stimulator (i.e. Neuromodulator) of each SS cell
            nest.SetStatus([SS4[i + j]],
                           {'stimulator': [SS4[i + j + 1], SS4[i + j + 18]]})
            nest.SetStatus([SS4[i + j + 1]],
                           {'stimulator': [SS4[i + j], SS4[i + j + 19]]})
            nest.SetStatus([SS4[i + j + 18]],
                           {'stimulator': [SS4[i + j], SS4[i + j + 19]]})
            nest.SetStatus([SS4[i + j + 19]],
                           {'stimulator': [SS4[i + j + 18], SS4[i + j + 1]]})

            # Connect betwen neuromodulators of SS cell (groups of 4 SS)
            nest.Connect([SS4[i + j]], [SS4[i + j + 1]], {"rule": "one_to_one"},
                         {"model": "stdp_synapse", 'delay': 0.1})
            nest.Connect([SS4[i + j]], [SS4[i + j + 18]], {"rule": "one_to_one"},
                         {"model": "stdp_synapse", 'delay': 0.1})
            nest.Connect([SS4[i + j + 1]], [SS4[i + j]], {"rule": "one_to_one"},
                         {"model": "stdp_synapse", 'delay': 0.1})
            nest.Connect([SS4[i + j + 1]], [SS4[i + j + 19]], {"rule": "one_to_one"},
                         {"model": "stdp_synapse", 'delay': 0.1})
            nest.Connect([SS4[i + j + 18]], [SS4[i + j]], {"rule": "one_to_one"},
                         {"model": "stdp_synapse", 'delay': 0.1})
            nest.Connect([SS4[i + j + 18]], [SS4[i + j + 19]], {"rule": "one_to_one"},
                         {"model": "stdp_synapse", 'delay': 0.1})
            nest.Connect([SS4[i + j + 19]], [SS4[i + j + 1]], {"rule": "one_to_one"},
                         {"model": "stdp_synapse", 'delay': 0.1})
            nest.Connect([SS4[i + j + 19]], [SS4[i + j + 18]], {"rule": "one_to_one"},
                         {"model": "stdp_synapse", 'delay': 0.1})

            # Connect each group of 4 SS to 4 layer 2/3 Pyramidal Cells so that each pyramidal only fire on polychrony arrival of SS fires
            nest.Connect([SS4[i + j], SS4[i + j + 1], SS4[i + j + 18], SS4[i + j + 19]],
                         [Pyr23[i+j], Pyr23[i+j+1], Pyr23[i+j+2], Pyr23[i+j+3]], {"rule": "all_to_all"},
                         {"weight": 100.0, "delay": 1.0})


            k += 1

    # We create a Detector so that we can get spike times and raster plot
    Detector = nest.Create('spike_detector')
    nest.Connect(Pyr5, Detector)
    nest.Connect(Pyr6, Detector)
    nest.Connect(SS4, Detector)
    nest.Connect(Pyr23, Detector)
    nest.Connect(In23, Detector)
    nest.Connect(In4, Detector)
    nest.Connect(In5, Detector)
    nest.Connect(In6, Detector)

    Spikes = nest.Create('spike_detector')
    nest.Connect(Pyr23, Spikes)
    nest.Connect(Pyr5, Spikes)
    nest.Connect(Pyr6, Spikes)

    Multimeter = nest.Create('multimeter',
                             params={'withtime': True, 'record_from': ['V_m', 'I_syn_ex'], 'interval': 0.1})
    nest.Connect(Multimeter, Pyr23)
    nest.Connect(Multimeter, Pyr5)
    nest.Connect(Multimeter, SS4)
    nest.Connect(Multimeter, Pyr6)

    SomaMultimeter = nest.Create('multimeter', params={'withtime': True, 'record_from': ['soma_exc'], 'interval': 0.1})
    nest.Connect(SomaMultimeter, SS4)

    return Detector, Spikes, Multimeter, SomaMultimeter, Pyr23, SS4, Pyr5, Pyr6, In23, In4, In5, In6
Beispiel #55
0
def load_data(dataset):
    # load the data: x, tx, allx, graph
    names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
    objects = []
    if dataset == 'LUAD':
        print('Loading LUAD data')
        data = torch.load(
            r'/media/administrator/INTERNAL3_6TB/TCGA_data/pyt_datasets/NSCLC/raw/numerical_data_308_2021-03-18.pt'
        )
        adj = sp.csr_matrix(data.adj_self.values)
        true_labels = []
        features = data.x
        idx_train = idx_test = idx_val = []
        # adj csr_matrix, features tensor, true_labels list with int, idx_train range 0-140, idx_val range 140-640, idx_test list 1000 ints
        return adj, features, true_labels, idx_train, idx_val, idx_test

    if dataset == 'wiki':
        adj, features, label = load_wiki()
        return adj, features, label, 0, 0, 0

    for i in range(len(names)):
        '''
        fix Pickle incompatibility of numpy arrays between Python 2 and 3
        https://stackoverflow.com/questions/11305790/pickle-incompatibility-of-numpy-arrays-between-python-2-and-3
        '''
        with open("data/ind.{}.{}".format(dataset, names[i]), 'rb') as rf:
            u = pkl._Unpickler(rf)
            u.encoding = 'latin1'
            cur_data = u.load()
            objects.append(cur_data)
        # objects.append(
        #     pkl.load(open("data/ind.{}.{}".format(dataset, names[i]), 'rb')))
    x, y, tx, ty, allx, ally, graph = tuple(objects)
    test_idx_reorder = parse_index_file(
        "data/ind.{}.test.index".format(dataset))
    test_idx_range = np.sort(test_idx_reorder)

    if dataset == 'citeseer':
        # Fix citeseer dataset (there are some isolated nodes in the graph)
        # Find isolated nodes, add them as zero-vecs into the right position
        test_idx_range_full = range(min(test_idx_reorder),
                                    max(test_idx_reorder) + 1)
        tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
        tx_extended[test_idx_range - min(test_idx_range), :] = tx
        tx = tx_extended
        ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
        ty_extended[test_idx_range - min(test_idx_range), :] = ty
        ty = ty_extended

    features = sp.vstack((allx, tx)).tolil()
    features[test_idx_reorder, :] = features[test_idx_range, :]
    features = torch.FloatTensor(np.array(features.todense()))
    adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))

    labels = np.vstack((ally, ty))
    labels[test_idx_reorder, :] = labels[test_idx_range, :]

    idx_test = test_idx_range.tolist()
    idx_train = range(len(y))
    idx_val = range(len(y), len(y) + 500)

    train_mask = sample_mask(idx_train, labels.shape[0])
    val_mask = sample_mask(idx_val, labels.shape[0])
    test_mask = sample_mask(idx_test, labels.shape[0])

    y_train = np.zeros(labels.shape)
    y_val = np.zeros(labels.shape)
    y_test = np.zeros(labels.shape)
    y_train[train_mask, :] = labels[train_mask, :]
    y_val[val_mask, :] = labels[val_mask, :]
    y_test[test_mask, :] = labels[test_mask, :]

    #adj csr_matrix, features tensor, true_labels list with int, idx_train range 0-140, idx_val range 140-640, idx_test list 1000 ints
    return adj, features, np.argmax(labels, 1), idx_train, idx_val, idx_test
Beispiel #56
0
import tensorflow as tf
import keras
from keras.utils import np_utils
import keras.models as models
from keras.layers.core import Reshape, Dense, Dropout, Activation, Flatten
from keras.layers.noise import GaussianNoise
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.regularizers import *
from keras.optimizers import adam
import matplotlib.pyplot as plt

# There is a Pickle incompatibility of numpy arrays between Python 2 and 3
# which generates ascii encoding error, to work around that we use the following instead of
# Xd = cPickle.load(open("RML2016.10a_dict.dat",'rb'))
with open('RML2016.10a_dict.dat', 'rb') as ff:
    u = cPickle._Unpickler(ff)
    u.encoding = 'latin1'
    Xd = u.load()

snrs, mods = map(lambda j: sorted(list(set(map(lambda x: x[j], Xd.keys())))),
                 [1, 0])
X = []
lbl = []
for mod in mods:
    for snr in snrs:
        X.append(Xd[(mod, snr)])
        for i in range(Xd[(mod, snr)].shape[0]):
            lbl.append((mod, snr))
X = np.vstack(X)

# Partition the data
Usage: python demo_trainmnist.py -s mnist.npy
"""

""""""

parser = argparse.ArgumentParser()
parser.add_argument("-d", "--double", help="Train on hidden layer of previously trained AE - specify params", default=False)

args = parser.parse_args()

print("Loading MNIST data")
# Retrieved from: http://deeplearning.net/data/mnist/mnist.pkl.gz

f = gzip.open('../mnist.pkl.gz', 'rb')
# work-around to deal with incompatibilities between pickle for Python 2.x and Python 3.x
u = pickle._Unpickler(f)
u.encoding = 'latin1'
(x_train, t_train), (x_valid, t_valid), (x_test, t_test) = u.load()
f.close()

data = x_train

dimZ = 20
HU_decoder = 400
HU_encoder = HU_decoder

batch_size = 100
L = 1
learning_rate = 0.01

if args.double:
# unpickle train Dict and plot losses and other curves
import matplotlib.pyplot as plt
import pickle
## Unet
with open('D:/trainHistoryDict_Dense1_all_in_resume_freeze', 'rb') as pi:
    u = pickle._Unpickler(pi)
    u.encoding = 'latin1'
    history = u.load()

with open('D:/trainHistoryDict_unet_all_in_resume', 'rb') as pi:
    u = pickle._Unpickler(pi)
    u.encoding = 'latin1'
    history_D = u.load()
## Dense
with open(
        'D:/Work/mre_t2/results_normal_new/keras_all_seq_0.001/model_1/trainHistoryDict_Dense1_all_in',
        'rb') as pi:
    u = pickle._Unpickler(pi)
    u.encoding = 'latin1'
    history_D = u.load()

loss = history['loss']

# plot the losses
fig = plt.figure(0)
plt.plot(history['loss'], label='MAE (training data Unet)')
plt.plot(history['val_loss'], label='MAE (validation data Unet)')
plt.plot(history_D['loss'], label='MAE (training data Dense)')
plt.plot(history_D['val_loss'], label='MAE (validation data Dense)')

plt.title('MAE for Losses')
Beispiel #59
0
def read_bin_file(fname):
    with open(fname, 'rb') as f:
        u = pkl._Unpickler(f)
        u.encoding = 'latin1'
        return u.load()
def pload(fname):
    with open(fname, 'rb') as f:
        u = pickle._Unpickler(f)
        u.encoding = 'latin1'
        data = u.load()
        return data