Esempio n. 1
0
    def __init__(self, url, timeout=30, utxo_cache=False, debug=False):
        self.sessionmaker = sessionmaker(bind=create_engine(url, connect_args={'connect_timeout': timeout}, encoding='utf8', echo=debug))

        self.address_cache = LFUCache(maxsize=16384)
        self.txid_cache = RRCache(maxsize=131072)
        self.utxo_cache = RRCache(maxsize=262144) if utxo_cache else None

        super(DatabaseIO, self).__init__(self.sessionmaker(), address_cache=self.address_cache, txid_cache=self.txid_cache, utxo_cache=self.utxo_cache)
Esempio n. 2
0
    def __init__(self, raw_slices, pmap_slices, gt_slices, patch_radius, batch_size, cache_size=5,
        p_from_cache=0.97):

        self.raw_slices = raw_slices
        self.pmap_slices = pmap_slices

        self.gt_slices  = gt_slices

        self.patch_radius = patch_radius

        self.batch_size = batch_size

        self.n_slices = raw_slices.shape[0]
        

        self.cache = RRCache(maxsize=cache_size)
        self.p_from_cache = p_from_cache 
Esempio n. 3
0
def get_cache(cache_type, cache_size):
    caches = {
        'lfu': LFUCache(cache_size),
        'lru': LRUCache(cache_size),
        #'rl' : RLCache(cache_size),
        'rr': RRCache(cache_size)
    }

    try:
        return caches[cache_type]
    except KeyError:
        return default()
Esempio n. 4
0
class MEGSexDataset(MEGDataset, BaseDatasetSex):
    LOAD_SUFFIX = '.npy'

    cache = RRCache(10000)

    # Do not cache the raw data
    @staticmethod
    @cached(cache)
    def get_features(path_to_file):
        return np.load(path_to_file[0])

    @property
    def modality_folders(self):
        return ['raw/MEG']
Esempio n. 5
0
def main():
    cache_size = 5
    data_size = 100
    num_accesses = 10000
    std_dev = 3

    accesses, data = init(data_size, num_accesses, std_dev)

    cache = OPTCache(cache_size, accesses)

    optimal_hits, optimal_misses = run(cache, accesses, data)

    cache = RRCache(cache_size)
    cache_hits, cache_misses = run(cache, accesses, data)

    print(optimal_hits, optimal_misses)
    print(cache_hits, cache_misses)
Esempio n. 6
0
class MEGRawRanges(MEGAgeRangesDataset):
    LOAD_SUFFIX = '.npy'

    cache = RRCache(10000)

    # Do not cache the raw data
    @staticmethod
    @cached(cache)
    def get_features(path_to_file):
        return np.load(path_to_file[0])

    @property
    def modality_folders(self):
        return ['raw/MEG']

    def inputshape(self):
        # FIXME should not have magic number, comes from assumed sample rate of 200
        return 700, self.slice_length
Esempio n. 7
0
    def test_rr(self):
        cache = RRCache(maxsize=2, choice=min)
        self.assertEqual(min, cache.choice)

        cache[1] = 1
        cache[2] = 2
        cache[3] = 3

        self.assertEqual(2, len(cache))
        self.assertEqual(2, cache[2])
        self.assertEqual(3, cache[3])
        self.assertNotIn(1, cache)

        cache[0] = 0
        self.assertEqual(2, len(cache))
        self.assertEqual(0, cache[0])
        self.assertEqual(3, cache[3])
        self.assertNotIn(2, cache)

        cache[4] = 4
        self.assertEqual(2, len(cache))
        self.assertEqual(3, cache[3])
        self.assertEqual(4, cache[4])
        self.assertNotIn(0, cache)
Esempio n. 8
0
import random
import numpy as np
import synth
from target import GENE_LABELS, GENE_VALUES
from deap import creator, base, tools
import librosa
from cachetools import cached, RRCache
from cachetools.keys import hashkey

# Define experiment settings
sr = 44100
cache = RRCache(maxsize=100)


def individual_to_params(individual):
    """
        Converts an individual to a dictionary of parameter values
    """
    return dict(zip(GENE_LABELS, individual))


def extract_features(sound_array):
    """
        Extracts MFCC and spectral bandwidth, centroid, flatness, and roll-off
        It seems that only MFCC features already perform quite well
    """
    return librosa.feature.mfcc(sound_array, sr).flatten()


@cached(cache,
        key=lambda individual, target_features: hashkey(
Esempio n. 9
0
 def cache(self, maxsize, choice=choice, missing=None, getsizeof=None):
     return RRCache(maxsize, choice=choice, missing=missing,
                    getsizeof=getsizeof)
Esempio n. 10
0
class BaseDataset:

    DATASET_TARGETS = [HEADER_AGE]
    NUM_BUCKETS = 5
    LOAD_SUFFIX = '.npy'

    # Not sure I like this...
    GENERATOR = SubjectFileLoader

    cache = RRCache(2 * 16384)

    @staticmethod
    # @lru_cache(maxsize=8192)
    @cached(cache)
    def get_features(path_to_file):
        """
        Loads arrays from file, and returned as a flattened vector, cached to save some time
        :param path_to_file:
        :return: numpy vector
        """
        # return loadmat(path_to_file, squeeze_me=True)['features'].ravel()
        l = np.load(path_to_file[0])
        #
        # if self.megind is not None:
        #     l = l[:, self.megind].squeeze()

        # l = zscore(l)
        return l

    @staticmethod
    def print_folds(buckets):
        for i, b in enumerate(buckets):
            b = np.array(b)
            print('Fold {0}, total datapoints: {1}'.format(i + 1, b.shape[0]))
            subjects, counts = np.unique(b[:, 0], return_counts=True)
            print('{0} Subjects used: {1}\nPoints per subject: {2}\n\n'.format(
                len(subjects), subjects, counts))

    def __init__(self,
                 toplevel,
                 PDK=True,
                 PA=True,
                 VG=True,
                 MO=False,
                 batchsize=2):
        self.toplevel = Path(toplevel)
        # some basic checking to make sure we have the right directory
        if not self.toplevel.exists() or not self.toplevel.is_dir():
            raise NotADirectoryError(
                "Provided top level directory is not directory")
        self.subject_hash = parsesubjects(self.toplevel / SUBJECT_STRUCT)

        self.batchsize = batchsize
        self.traindata = None

        tests = []
        # Assemble which experiments we are going to be using
        if PDK:
            tests.append(TEST_PDK)
        if PA:
            tests.append(TEST_PA)
        if VG:
            tests.append(TEST_VG)
        if MO:
            tests.append(TEST_MO)

        if self.preprocessed_file in [
                x.name for x in self.toplevel.iterdir() if not x.is_dir()
        ]:
            with (self.toplevel / self.preprocessed_file).open('rb') as f:
                print('Loaded previous preprocessing!')
                self.buckets, self.longest_vector, self.slice_length,\
                self.testpoints, self.training_subjects = pickle.load(f)
                # list of subjects that we will use for the cross validation
                # self.leaveoutsubjects = np.unique(self.datapoints[:, 0])
                # Todo: warn/update pickled file if new subjects exist
                self.print_folds(self.buckets)
        else:
            print('Preprocessing data...')

            self.training_subjects, self.longest_vector, self.slice_length = self.files_to_load(
                tests)

            if TEST_SUBJECTS:
                print('Forcing test subjects...')
                testsubjects = TEST_SUBJECTS
            else:
                testsubjects = np.random.choice(
                    list(self.training_subjects.keys()),
                    int(len(self.training_subjects) / 10),
                    replace=False)
            self.testpoints = np.array([
                item for x in testsubjects
                for item in self.training_subjects[x]
            ])
            for subject in testsubjects:
                self.training_subjects.pop(subject)
            print('Subjects used for testing:', testsubjects)

            datapoint_ordering = sorted(
                self.training_subjects,
                key=lambda x: -len(self.training_subjects[x]))
            self.buckets = [[] for x in range(self.NUM_BUCKETS)]
            # Fill the buckets up and down
            for i in range(len(datapoint_ordering)):
                if int(i / self.NUM_BUCKETS) % 2:
                    index = self.NUM_BUCKETS - (i % self.NUM_BUCKETS) - 1
                    self.buckets[int(index)].extend(
                        self.training_subjects[datapoint_ordering[i]])
                else:
                    self.buckets[int(i % self.NUM_BUCKETS)].extend(
                        self.training_subjects[datapoint_ordering[i]])

            with (self.toplevel / self.preprocessed_file).open('wb') as f:
                pickle.dump(
                    (self.buckets, self.longest_vector, self.slice_length,
                     self.testpoints, self.training_subjects), f)
                # numpoints = self.datapoints.size[0]
                # ind = np.arange(numpoints)
                # ind = np.random.choice(ind, replace=False, size=int(0.2*numpoints)
            self.print_folds(self.buckets)

        self.next_leaveout(force=0)

    @property
    @abstractmethod
    def modality_folders(self) -> list:
        """
        Subclasses must implement this so that it reports the name of the folder(s) to find experiments, once in the
        subject folder.
        :return:
        """
        pass

    def files_to_load(self, tests):
        """
        This should be implemented by subclasses to specify what files
        :param tests: The type of tests that should make up the dataset
        :return: A dictionary for the loaded subjects
        :rtype: tuple
        """
        longest_vector = -1
        slice_length = -1
        loaded_subjects = {}
        for subject in tqdm([
                x for x in self.toplevel.iterdir()
                if x.is_dir() and x.name in self.subject_hash.keys()
        ]):
            tqdm.write('Loading subject ' + subject.stem + '...')
            loaded_subjects[subject.stem] = []
            for experiment in tqdm([
                    t for e in self.modality_folders if (subject / e).exists()
                    for t in (subject / e).iterdir() if t.name in tests
            ]):

                for epoch in tqdm([
                        l for l in experiment.iterdir()
                        if l.suffix == self.LOAD_SUFFIX
                ]):
                    try:
                        # f = loadmat(str(epoch), squeeze_me=True)
                        f = self.get_features(tuple([epoch]))
                        if np.isnan(f).any():
                            tqdm.write('NaNs found in ' + str(epoch))
                            time.sleep(1)
                        # slice_length = max(slice_length, len(f['header']))
                        # longest_vector = max(longest_vector,
                        #                           len(f['features'].reshape(-1)))
                        slice_length = max(slice_length, f.shape[1])
                        longest_vector = max(longest_vector,
                                             f.shape[0] * f.shape[1])
                        loaded_subjects[subject.stem].append(
                            (subject.stem, epoch))
                    except Exception as e:
                        tqdm.write(
                            'Warning: Skipping file, error occurred loading: '
                            + str(epoch))

        return loaded_subjects, longest_vector, slice_length

    @property
    @abstractmethod
    def preprocessed_file(self):
        pass

    def next_leaveout(self, force=None):
        """
        Moves on to the next group to leaveout.
        :return: Number of which leaveout, `None` if complete
        """
        if force is not None:
            self.leaveout = force

        if self.leaveout == self.NUM_BUCKETS:
            print('Have completed cross-validation')
            self.leaveout = None
            # raise CrossValidationComplete
            return self.leaveout

        # Select next bucket to leave out as evaluation
        self.eval_points = np.array(self.buckets[self.leaveout])

        # Convert the remaining buckets into one list
        self.traindata = np.array([
            item for sublist in self.buckets for item in sublist
            if self.buckets.index(sublist) != self.leaveout
        ])

        self.leaveout += 1

        return self.leaveout

    def current_leaveout(self):
        return self.leaveout

    def sanityset(self, fold=3, batchsize=None, flatten=True):
        """
        Provides a generator for a small subset of data to ensure that the model can train to it
        :return: 
        """
        if batchsize is None:
            batchsize = self.batchsize

        return self.GENERATOR(np.array(
            self.buckets[fold]
            [int(0 * len(self.buckets[fold])):int(1 *
                                                  len(self.buckets[fold]))]),
                              self.toplevel,
                              self.longest_vector,
                              self.subject_hash,
                              self.DATASET_TARGETS,
                              self.slice_length,
                              self.get_features,
                              batchsize=batchsize,
                              flatten=flatten)

    def trainingset(self, batchsize=None, flatten=True):
        """
        Provides a generator object with the current training set
        :param flatten: Whether to flatten the resulting 
        :param batchsize:
        :return: Generator of type :class`.SubjectFileLoader`
        """
        if batchsize is None:
            batchsize = self.batchsize

        if self.traindata is None:
            raise AttributeError(
                'No fold initialized... Try calling next_leaveout')

        return self.GENERATOR(self.traindata,
                              self.toplevel,
                              self.longest_vector,
                              self.subject_hash,
                              self.DATASET_TARGETS,
                              self.slice_length,
                              self.get_features,
                              batchsize=batchsize,
                              flatten=flatten)

    def evaluationset(self, batchsize=None, flatten=True):
        """
        Provides a generator object with the current training set
        :param batchsize:
        :return: Generator of type :class`.SubjectFileLoader`
        """
        if batchsize is None:
            batchsize = self.batchsize

        return self.GENERATOR(self.eval_points,
                              self.toplevel,
                              self.longest_vector,
                              self.subject_hash,
                              self.DATASET_TARGETS,
                              self.slice_length,
                              self.get_features,
                              batchsize=batchsize,
                              flatten=flatten,
                              evaluate=True)

    def testset(self, batchsize=None, flatten=True):
        """
        Provides a generator object with the current training set
        :param batchsize:
        :return: Generator of type :class`.SubjectFileLoader`
        """
        if batchsize is None:
            batchsize = self.batchsize

        return self.GENERATOR(self.testpoints,
                              self.toplevel,
                              self.longest_vector,
                              self.subject_hash,
                              self.DATASET_TARGETS,
                              self.slice_length,
                              self.get_features,
                              batchsize=batchsize,
                              flatten=flatten,
                              evaluate=True)

    def inputshape(self):
        return int(self.longest_vector // self.slice_length), self.slice_length

    def outputshape(self):
        return len(self.DATASET_TARGETS)
Esempio n. 11
0
class IsbiJ3Feeder(object):
    
    def __init__(self, raw_slices, pmap_slices, gt_slices, patch_radius, batch_size, cache_size=5,
        p_from_cache=0.97):

        self.raw_slices = raw_slices
        self.pmap_slices = pmap_slices

        self.gt_slices  = gt_slices

        self.patch_radius = patch_radius

        self.batch_size = batch_size

        self.n_slices = raw_slices.shape[0]
        

        self.cache = RRCache(maxsize=cache_size)
        self.p_from_cache = p_from_cache 
    
    def __call__(self):
        
        batch_images           = [None]*self.batch_size
        batch_gt               = [None]*self.batch_size
        batch_gt_quali  = [None]*self.batch_size

        for i in range(self.batch_size):

            # get a patch extractor
            patch_extractor = self.__get_random_slice_data()
            

            # get a random 0-cell index
            # but we only consider 0-cells with
            # a since of 3
            n_patches = len(patch_extractor)
            

            j3_labels = patch_extractor.j3_labels
            assert len(j3_labels) >=1

            done = False
            while(not done):
                rand_index = random.randint(0, len(j3_labels)-1)
                cell_0_label = j3_labels[rand_index]
                assert cell_0_label >= 1
                cell_0_index = cell_0_label - 1
                try:
                    done = True
                    img, gt, gt_quali = patch_extractor[cell_0_index]    
                except:
                    print("hubs....")
                    done  = False

            # img shape atm : x,y,c
            # => desired 1,c,x,y
            img = numpy.rollaxis(img, 2,0)[None,...]
            batch_images[i] = img

          
            batch_gt[i] = gt
            batch_gt_quali[i] = gt_quali


        batch_images = numpy.concatenate(batch_images,axis=0)
        #print("batch_gt",batch_gt)
        batch_gt = numpy.array(batch_gt)
        batch_gt_quali = numpy.array(batch_gt_quali)
        # batch_images: (batch_size, c, x,y)
        # batch_gt:     (batch_size, 3)
        return batch_images, batch_gt, batch_gt_quali
    
    def __get_random_slice_data(self):

        take_from_cache = random.random() >= (1.0 - self.p_from_cache )
        if take_from_cache and len(self.cache)>0:
            # get random item from cache via pop 
            # (since this is a random cache this 
            # will lead to a random item)
            per_slice_data = self.__get_random_from_cache()
            return per_slice_data

        else:
            # (maybe) compute new
            # random slice
            slice_index = random.randint(0, self.n_slices-1)

            # get the per_slice_data from cache iff already in cache.
            # Iff not in cache, compute per_slice_data and put to cache
            per_slice_data = self.__force_to_cache(slice_index=slice_index)
            return per_slice_data
    
    def __get_random_from_cache(self):
        assert len(self.cache) > 0
        slice_index, per_slice_data = self.cache.popitem()
        self.cache[slice_index] = per_slice_data
        return per_slice_data

    def __force_to_cache(self, slice_index):
        if slice_index in self.cache:
            per_slice_data = self.cache[slice_index]
            return per_slice_data
        else:
            per_slice_data = self.__compute_per_slice_data(slice_index)
            self.cache[slice_index] = per_slice_data
            return per_slice_data

    def __edge_gt_to_node_gt(self, edge_gt):
        # the edge_gt is on membrane level
        # 0 at membranes pixels
        # 1 at non-membrane piper_slice_dataxels

        seeds = nifty.segmentation.localMaximaSeeds(edge_gt)
        growMap = nifty.filters.gaussianSmoothing(1.0-edge_gt, 1.0)
        growMap += 0.1*nifty.filters.gaussianSmoothing(1.0-edge_gt, 6.0)
        gt = nifty.segmentation.seededWatersheds(growMap, seeds=seeds)

        return gt

    def __compute_per_slice_data(self, slice_index):

        raw_slice   = self.raw_slices[slice_index,:,:]
        gt_slice    = self.gt_slices[slice_index,:,:]
        pmap_slice  = self.pmap_slices[slice_index,:,:]
        edge_gt     = self.gt_slices[slice_index,:,:]
        node_gt     = self.__edge_gt_to_node_gt(edge_gt)

        # randomized overseg 
        threshold = random.uniform(0.275, 0.55)
        overseg = nifty.segmentation.distanceTransformWatersheds(pmap_slice.copy(), 
            threshold=threshold)
        hl_cgp = HlCgp(overseg)
        cell_0_patch_extrator = Cell0PatchExtrator(hl_cgp, image=raw_slice, 
            node_gt=node_gt,
            radius=self.patch_radius)

        return cell_0_patch_extrator
Esempio n. 12
0
    def __init__(self, max_dim):
        """

        :param max_dim:
        """
        self.cache = RRCache(maxsize=max_dim)
Esempio n. 13
0
class CoapRRCache(CoapCache):
    def __init__(self, max_dim):
        """

        :param max_dim:
        """
        self.cache = RRCache(maxsize=max_dim)

    def update(self, key, element):
        """

        :param key:
        :param element:
        :return:
        """
        print "updating cache"
        print "key: ", key.hashkey
        print "element: ", element
        self.cache.update([(key.hashkey, element)])

    def get(self, key):
        """

        :param key:
        :return: CacheElement
        """
        try:
            print "Getting cache response"
            response = self.cache[key.hashkey]
        except KeyError:
            print "problem here"
            response = None
        return response

    def is_full(self):
        """
        :return:
        """
        if self.cache.currsize == self.cache.maxsize:
            return True
        return False

    def is_empty(self):
        """

        :return:
        """

        if self.cache.currsize == 0:
            return True
        return False

    def debug_print(self):
        """

        :return:
        """
        print "size = ", self.cache.currsize
        list = self.cache.items()
        for key, element in list:
            print "element.max age ", element.max_age
            print "element.uri", element.uri
            print "element.freshness ", element.freshness