Exemple #1
0
    def load_state(self):
        logger.info("Checking for saved state")

        state = self.persistence_handler.load_state()
        if state is None:
            logger.info("Saved state not found")
            return

        if self.config.snapshot_compress_state:
            state = zlib.decompress(base64.b64decode(state))

        loaded_drain: Drain = jsonpickle.loads(state, keys=True)

        # json-pickle encoded keys as string by default, so we have to convert those back to int
        # this is only relevant for backwards compatibility when loading a snapshot of drain <= v0.9.1
        # which did not use json-pickle's keys=true
        if len(loaded_drain.id_to_cluster) > 0 and isinstance(
                next(iter(loaded_drain.id_to_cluster.keys())), str):
            loaded_drain.id_to_cluster = {
                int(k): v
                for k, v in list(loaded_drain.id_to_cluster.items())
            }
            if self.config.drain_max_clusters:
                cache = LRUCache(maxsize=self.config.drain_max_clusters)
                cache.update(loaded_drain.id_to_cluster)
                loaded_drain.id_to_cluster = cache

        self.drain.id_to_cluster = loaded_drain.id_to_cluster
        self.drain.clusters_counter = loaded_drain.clusters_counter
        self.drain.root_node = loaded_drain.root_node

        logger.info("Restored {0} clusters built from {1} messages".format(
            len(loaded_drain.clusters), loaded_drain.get_total_cluster_size()))
Exemple #2
0
def initialize(password):
    # get salt from config
    salt = get_salt()
    kdf = PBKDF2HMAC(algorithm=hashes.SHA256(),
                     length=32,
                     salt=salt,
                     iterations=100000,
                     backend=default_backend())
    key = base64.urlsafe_b64encode(kdf.derive(password.encode()))
    f = Fernet(key)
    # read encrypted data from file
    data = read_data()
    try:
        # decrypt data into dict
        data = ast.literal_eval(f.decrypt(data.encode()).decode())
        try:
            # simple check if decryption was correct
            if data['check'] == True:
                cache_data = []
                # initialize cache to size 100 TODO: dynamic sizing
                cache = LRUCache(maxsize=100)
                for key, value in data.items():
                    cache_data.append((key, value))
                # populate cache
                cache.update(cache_data)
                print("Initializing cache..")
                return cache
            else:
                print('An error has occurred!')
        except Exception as e:
            print('An error has occurred!')
            print(str(e))
    except Exception as e:
        print('An error has occurred!')
        print(str(e))
Exemple #3
0
class Cache():
    def __init__(self, size=10):
        self.data = LRUCache(size)
        self.cache_lock = Lock()

    """
    Get value from cache with key=key
    Retrun data if data exists, otherwise return _default value
    """

    def get(self, key, _default=False):
        result = _default
        try:
            self.cache_lock.acquire()
            result = self.data.get(key, default=_default)
            self.cache_lock.release()
        except Exception as e:
            logging.exception(e.message)
            return _default
        return result

    """
    Insert or update data with key=key
    Return True if successfull, otherwise return False
    """

    def set(self, key, value):
        try:
            self.cache_lock.acquire()
            self.data.update([(key, value)])
            self.cache_lock.release()
            return True
        except Exception as e:
            logging.exception(e.message)
            return False
Exemple #4
0
class CoapLRUCache(CoapCache):
    def __init__(self, max_dim):
        """

        :param max_dim:
        """
        self.cache = LRUCache(maxsize=max_dim)

    def update(self, key, element):
        """

        :param key:
        :param element:
        :return:
        """
        self.cache.update([(key.hashkey, element)])

    def get(self, key):
        """

        :param key:
        :return: CacheElement
        """
        try:
            response = self.cache[key.hashkey]
        except KeyError:
            print "problem here"
            response = None
        return response

    def is_full(self):
        """

        :return:
        """
        if self.cache.currsize == self.cache.maxsize:
            return True
        return False

    def is_empty(self):
        """

        :return:
        """

        if self.cache.currsize == 0:
            return True
        return False

    def debug_print(self):
        """

        :return:
        """
        print "size = ", self.cache.currsize
Exemple #5
0
        if len(missing_remote_ids) > 0:
            (hr_remote_ids,
             csgo_remote_ids) = split_into_hr_csgo_ids(missing_remote_ids)
            if len(csgo_remote_ids) > 0:
                for ingestion_event in ingestion_event_coll.find(
                    {"identifier": {
                        "$in": list(csgo_remote_ids)
                    }}):
                    identifier = ingestion_event['identifier']
                    all_remote_ids[identifier] = ingestion_event['metaData']

        smart_picks_for_entries = {}

        lineup_by_pool_id = lineups_by_pool_ids(lineups_coll, missing_pool_ids)
        all_lineups_by_pool_id.update(lineup_by_pool_id)

        #find_start_time = time.time()
        find_transactions = list(
            transactions_coll.find({
                "metaData.entryId": {
                    "$in": list(entry_ids)
                },
                "txnType":
                "Credit",
                "$or": [{
                    "metaData.code": "1100"
                }, {
                    "desc": {
                        "$regex": prize_regex
                    }
Exemple #6
0
class CoapLRUCache(CoapCache):
    def __init__(self, max_dim):
        """

        :param max_dim:
        """
        self.cache = LRUCache(maxsize=max_dim)

    def update(self, key, element):
        """

        :param key:
        :param element:
        :return:
        """
        logger.debug("updating cache, key: %s, element: %s", \
                key.hashkey, element)
        self.cache.update([(key.hashkey, element)])

    def get(self, key):
        """

        :param key:
        :return: CacheElement
        """
        try:
            response = self.cache[key.hashkey]
        except KeyError:
            logger.debug("problem here", exc_info=1)
            response = None
        return response

    def is_full(self):
        """
        :return:
        """
        if self.cache.currsize == self.cache.maxsize:
            return True
        return False

    def is_empty(self):
        """

        :return:
        """

        if self.cache.currsize == 0:
            return True
        return False

    def __str__(self):
        msg = []
        for e in self.cache.values():
            msg.append(str(e))
        return "Cache Size: {sz}\n" + "\n".join(msg)

    def debug_print(self):
        """

        :return:
        """
        return ("size = %s\n%s" % (
            self.cache.currsize,
            '\n'.join([
                (   "element.max age %s\n"\
                    "element.uri %s\n"\
                    "element.freshness %s"  ) % (
                        element.max_age,
                        element.uri,
                        element.freshness )
                for key, element
                in list(self.cache.items())
            ])))
class RigidPuzzle(rlenv.IExperienceReplayEnvironment):

    def __init__(self, args, tid, agent_id=-1):
        dumpdir = None
        if args.exploredir is not None:
            assert agent_id >= 0, '--exploredir require RigidPuzzle constructed with non-negative agent_id'
            dumpdir = '{}/agent-{}/'.format(args.exploredir, agent_id)
            try:
                os.makedirs(dumpdir)
            except OSError as e:
                print("Exc {}".format(e))
                if errno.EEXIST != e.errno:
                    raise
        super(RigidPuzzle, self).__init__(tmax=args.batch, erep_cap=args.ereplayratio, dumpdir=dumpdir)
        self.args = args
        self.fb_cache = None
        self.fb_dirty = True
        r = self.r = rlutil.create_renderer(args, creating_ctx=False)
        self.istateraw = args.istateraw
        self.istate = np.array(r.translate_to_unit_state(args.istateraw), dtype=np.float32)
        r.state = self.istate
        self.rgb_shape = (len(r.views), r.pbufferWidth, r.pbufferHeight, 3)
        self.dep_shape = (len(r.views), r.pbufferWidth, r.pbufferHeight, 1)
        self.action_magnitude = args.amag
        self.verify_magnitude = args.vmag
        self.collision_cache = LRUCache(maxsize = 128)
        self.egreedy = args.egreedy[0] # e-greedy is an agent-specific variable
        if len(args.egreedy) != 1:
            self.egreedy = args.egreedy[tid % len(args.egreedy)]
        self.permutemag = args.permutemag
        self.perturbation = False
        self.dump_id = 0
        self.steps_since_reset = 0
        self.PEN = args.PEN
        self.REW = args.REW
        if args.msi_file:
            dic = np.load(args.msi_file)
            self.traj_s = dic['TRAJ_S']
            assert self.traj_s[0].all() == self.istate.all(), "--mis_file does not match istate"
            self.traj_a = dic['TRAJ_A']
            self.msi_index = 0
        else:
            self.traj_a = None
            self.msi_index = None
        # lstm_barn: a cookie property to track the lstm states
        self.lstm_barn = None

    def enable_perturbation(self, manual_p=None):
        self.perturbation = True
        self.manual_p = manual_p
        self.reset()

    def disable_perturbation(self):
        self.perturbation = False
        self.reset()

    def get_perturbation(self):
        return self.r.perturbation

    def qstate_setter(self, state):
        # print('old {}'.format(self.r.state))
        # print('new {}'.format(state))
        if np.array_equal(self.r.state, state):
            return
        self.r.state = state
        self.fb_dirty = True
        self.steps_since_reset += 1

    def qstate_getter(self):
        return self.r.state

    qstate = property(qstate_getter, qstate_setter)

    @property
    def vstate(self):
        if self.fb_cache is not None and not self.fb_dirty:
            return self.fb_cache
        self.fb_dirty = False
        r = self.r
        r.render_mvrgbd()
        rgb = np.copy(r.mvrgb.reshape(self.rgb_shape))
        dep = np.copy(r.mvdepth.reshape(self.dep_shape))
        self.fb_cache = [rgb, dep]
        return self.fb_cache

    @property
    def vstatedim(self):
        return self.rgb_shape[0:3]

    def peek_act(self, action, pprefix="", start_state=None):
        r = self.r
        start_state = r.state if start_state is None else start_state
        colkey = tuple(start_state.tolist() + [action])
        if colkey in self.collision_cache:
            print("Cache Hit {}".format(colkey))
            nstate, done, ratio = self.collision_cache[colkey]
        else:
            nstate, done, ratio = r.transit_state(start_state,
                    action,
                    self.action_magnitude,
                    self.verify_magnitude)
            if ratio < 1e-4:
                '''
                Disable moving forward if ratio is too small.
                '''
                ratio = 0
                nstate = np.copy(start_state)
        sa = (colkey, (nstate, done, ratio))
        reaching_terminal = r.is_disentangled(nstate)
        reward = 0.0
        # reward += pyosr.distance(start_state, nstate) # Reward by translation
        reward += self.REW if reaching_terminal is True else 0.0 # Large Reward for solution
        if not done:
            '''
            Special handling of collision
            '''
            if ratio == 0.0:
                reward += self.PEN
                if 'die' in self.args.gameconf:
                    reaching_terminal = True
            self.collision_cache.update([sa])
        rgb_1, dep_1 = self.vstate
        self.state = nstate
        rgb_2, dep_2 = self.vstate
        print("{}New state {} ratio {} terminal {} reward {}".format(pprefix, nstate, ratio, reaching_terminal, reward))
        return nstate, reward, reaching_terminal, ratio

    def reset(self):
        super(RigidPuzzle, self).reset()
        if self.perturbation:
            r = self.r
            if self.manual_p is None:
                r.set_perturbation(uw_random.random_state(self.permutemag))
            else:
                r.set_perturbation(self.manual_p)
            '''
            Different perturbation has different istate in unit world.
            '''
            self.istate = np.array(r.translate_to_unit_state(self.istateraw), dtype=np.float32)
        self.qstate = self.istate
        self.steps_since_reset = 0
        if self.msi_index is not None:
            self.msi_index = 0
Exemple #8
0
class GeneticDetMinimizer(object):
    def __init__(self, N=30, popsize=500, cachesize=None, seed=0):

        # an 'individual' consists of an (N^2,) flat numpy array of 0s and 1s
        self.N = N
        self.indiv_size = N * N

        if cachesize is None:
            cachesize = int(np.ceil(104 * MAX_MEM_BYTES / 10))

        self._gen = np.random.RandomState(seed)

        # we want the creator module to be local to this instance, since
        # creator.create() directly adds new classes to the module's globals()
        # (yuck!)
        cr = imp.load_module('cr', *imp.find_module('creator', deap.__path__))
        self._cr = cr

        self._cr.create("FitnessMin", base.Fitness, weights=(-1.0, ))
        self._cr.create("Individual", np.ndarray, fitness=self._cr.FitnessMin)

        self._tb = base.Toolbox()
        self._tb.register("attr_bool", self.random_bool)
        self._tb.register("individual",
                          tools.initRepeat,
                          self._cr.Individual,
                          self._tb.attr_bool,
                          n=self.indiv_size)

        # the 'population' consists of a list of such individuals
        self._tb.register("population", tools.initRepeat, list,
                          self._tb.individual)
        self._tb.register("evaluate", self.fitness)
        self._tb.register("mate", self.crossover)
        self._tb.register("mutate", self.mutate, rate=0.002)
        self._tb.register("select", tools.selTournament, tournsize=3)

        # create an initial population, and initialize a hall-of-fame to store
        # the best individual
        self.pop = self._tb.population(n=popsize)
        self.hof = tools.HallOfFame(1, similar=np.array_equal)

        # print summary statistics for the population on each iteration
        self.stats = tools.Statistics(lambda ind: ind.fitness.values)
        self.stats.register("avg", np.mean)
        self.stats.register("std", np.std)
        self.stats.register("min", np.min)
        self.stats.register("max", np.max)

        # keep track of configurations that have already been visited
        self.tabu = LRUCache(cachesize)

    def random_bool(self, *args):
        return self._gen.rand(*args) < 0.5

    def mutate(self, ind, rate=1E-3):
        """
        mutate an individual by bit-flipping one or more randomly chosen
        elements
        """
        # ensure that each mutation always introduces a novel configuration
        while np.packbits(ind.astype(np.uint8)).tostring() in self.tabu:
            n_flip = self._gen.binomial(self.indiv_size, rate)
            if not n_flip:
                continue
            idx = self._gen.random_integers(0, self.indiv_size - 1, n_flip)
            ind[idx] = ~ind[idx]
        return ind,

    def fitness(self, individual):
        """
        assigns a fitness value to each individual, based on the determinant
        """
        print("Individual = {}".format(individual))
        h = np.packbits(individual.astype(np.uint8)).tostring()
        # look up the fitness for this configuration if it has already been
        # encountered
        print("H = {}".format(h))

        if h not in self.tabu:
            fitness = np.linalg.det(individual.reshape(self.N, self.N))
            self.tabu.update({h: fitness})
        else:
            fitness = self.tabu[h]

        print("fitness = {}".format(fitness))
        return fitness,

    def crossover(self, ind1, ind2):
        """
        randomly swaps a block of rows or columns between two individuals
        """

        cx1 = self._gen.random_integers(0, self.N - 2)
        cx2 = self._gen.random_integers(cx1, self.N - 1)
        ind1.shape = ind2.shape = self.N, self.N

        if self._gen.rand() < 0.5:
            # row swap
            ind1[cx1:cx2, :], ind2[cx1:cx2, :] = (ind2[cx1:cx2, :].copy(),
                                                  ind1[cx1:cx2, :].copy())
        else:
            # column swap
            ind1[:, cx1:cx2], ind2[:, cx1:cx2] = (ind2[:, cx1:cx2].copy(),
                                                  ind1[:, cx1:cx2].copy())

        ind1.shape = ind2.shape = self.indiv_size,

        return ind1, ind2

    def run(self, ngen=int(5), mutation_rate=0.3, crossover_rate=0.7):

        pop, log = algorithms.eaSimple(self.pop,
                                       self._tb,
                                       cxpb=crossover_rate,
                                       mutpb=mutation_rate,
                                       ngen=ngen,
                                       stats=self.stats,
                                       halloffame=self.hof)
        self.log = log

        return self.hof[0].reshape(self.N, self.N), log
Exemple #9
0
class CoapLRUCache(CoapCache):
    def __init__(self, max_dim):
        """

        :param max_dim:
        """
        self.cache = LRUCache(maxsize=max_dim)

    def update(self, key, element):
        """

        :param key:
        :param element:
        :return:
        """
        six.print_("updating cache")
        six.print_("key: ", key.hashkey)
        six.print_("element: ", element)
        self.cache.update([(key.hashkey, element)])

    def get(self, key):
        """

        :param key:
        :return: CacheElement
        """
        try:
            response = self.cache[key.hashkey]
        except KeyError:
            six.print_("problem here")
            response = None
        return response

    def is_full(self):
        """
        :return:
        """
        if self.cache.currsize == self.cache.maxsize:
            return True
        return False

    def is_empty(self):
        """

        :return:
        """

        if self.cache.currsize == 0:
            return True
        return False

    def debug_print(self):
        """

        :return:
        """
        six.print_("size = ", self.cache.currsize)
        list = self.cache.items()
        for key, element in list:
            six.print_("element.max age ", element.max_age)
            six.print_("element.uri", element.uri)
            six.print_("element.freshness ", element.freshness)
Exemple #10
0
class GeneticDetMinimizer(object):
    def __init__(self, popsize=10, cachesize=None, seed=234):

        self._gen = np.random.RandomState(seed)
        #        self._FEVAL = 0;

        if cachesize is None:
            cachesize = 10e6
#            cachesize = int(np.ceil(144 * MAX_MEM_BYTES / 10))

        self._popsize = popsize

        # we want the creator module to be local to this instance, since
        # creator.create() directly adds new classes to the module's globals()
        cr = imp.load_module('cr', *imp.find_module('creator', deap.__path__))
        self._cr = cr

        ## Creator for a Maximization Fun, weiths = 1
        self._cr.create("FitnessMax", base.Fitness, weights=(1.0, ))
        ## Creator for a Individual
        self._cr.create("Individual", list, fitness=self._cr.FitnessMax)

        ## Creating the Variables to be tuned
        self._tb = base.Toolbox()
        # layers size
        self._tb.register("layer_size", random.randint, 0, 1)
        # num_units
        self._tb.register("num_units", random.randint, 8, 128)
        # activ_fun
        self._tb.register("activ_fun", random.randint, 0, 3)
        # optimizer
        self._tb.register("optimizer", random.randint, 0, 3)
        # w_dropout
        self._tb.register("w_dropout", self.round_random, 2)

        # Structure initializers
        self._tb.register(
            "individual", tools.initCycle, self._cr.Individual,
            (self._tb.layer_size, self._tb.num_units, self._tb.activ_fun,
             self._tb.optimizer, self._tb.w_dropout))

        # the 'population' consists of a list of such individuals
        self._tb.register("population", tools.initRepeat, list,
                          self._tb.individual)
        self._tb.register("evaluate", self.fitness)
        self._tb.register("mate", tools.cxUniform, indpb=0.6)
        self._tb.register("mutate", self.mutate)
        self._tb.register("select", tools.selTournament, tournsize=3)

        # create an initial population, and initialize a hall-of-fame to store
        # the best individual
        self.pop = self._tb.population(n=10)
        self.hof = tools.HallOfFame(1)

        # print summary statistics for the population on each iteration
        self.stats = tools.Statistics(lambda ind: ind.fitness.values)
        self.stats.register("avg", np.mean)
        self.stats.register("std", np.std)
        self.stats.register("min", np.min)
        self.stats.register("max", np.max)

        # keep track of configurations that have already been visited
        self.tabu = LRUCache(cachesize)

    def round_random(self, x):
        return round(random.random(), 1)

    def mutate(self, ind1):
        """
        # ensure that each mutation always introduces a novel configuration        
        """
        mut = self._tb.clone(ind1)

        mut[0] = random.randint(0, 1)
        mut[1] = random.randint(8, 128)
        mut[2], mut[3] = [random.randint(0, 3), random.randint(0, 3)]
        mut[4] = random.random()

        ind2 = mut

        del mut.fitness.values

        return ind2,

    def fitness(self, individual):
        """
        assigns a fitness value to each individual, based on the determinant
        """
        h = str(individual)
        # look up the fitness for this configuration if it has already been
        # encountered
        if h not in self.tabu:
            fitness = dqn(individual)
            self.tabu.update({h: fitness})
        else:
            fitness = self.tabu[h]


#        self._FEVAL += 1

#        print("Feval {} and current cache = {} of {} ".format(self._FEVAL,self.tabu.currsize,self.tabu.maxsize))

        return fitness,

    def run(self, ngen, mutation_rate=0.5, crossover_rate=0.8):

        pop, log = algorithms.eaSimple(self.pop,
                                       self._tb,
                                       cxpb=crossover_rate,
                                       mutpb=mutation_rate,
                                       ngen=ngen,
                                       stats=self.stats,
                                       halloffame=self.hof)

        self.log = log

        return self.hof[0], log
Exemple #11
0
class UprootSourceMapping(Mapping):
    _debug = False

    def __init__(self, uuid_pfnmap, cache=None):
        self._uuid_pfnmap = uuid_pfnmap
        self._cache = cache
        self.setup()

    def setup(self):
        if self._cache is None:
            self._cache = LRUCache(10)
        self._uproot_options = {
            "array_cache": self._cache,
            "object_cache": self._cache,
        }

    def __getstate__(self):
        return {
            "uuid_pfnmap": self._uuid_pfnmap,
        }

    def __setstate__(self, state):
        self._uuid_pfnmap = state["uuid_pfnmap"]
        self._cache = None
        self.setup()

    def _tree(self, uuid, treepath):
        key = "UprootSourceMapping:" + tuple_to_key((uuid, treepath))
        try:
            return self._cache[key]
        except KeyError:
            pass
        pfn = self._uuid_pfnmap[uuid]
        tree = uproot4.open(pfn + ":" + treepath, **self._uproot_options)
        if str(tree.file.uuid) != uuid:
            raise RuntimeError(
                f"UUID of file {pfn} does not match expected value ({uuid})")
        self._cache[key] = tree
        return tree

    def preload_tree(self, uuid, treepath, tree):
        """To save a double-open when using NanoEventsFactory.from_file"""
        key = "UprootSourceMapping:" + tuple_to_key((uuid, treepath))
        self._cache.update(tree.file.array_cache)
        self._cache.update(tree.file.object_cache)
        tree.file.array_cache = self._cache
        tree.file.object_cache = self._cache
        self._cache[key] = tree

    @classmethod
    def interpret_key(cls, key):
        uuid, treepath, entryrange, form_key, *layoutattr = key_to_tuple(key)
        start, stop = (int(x) for x in entryrange.split("-"))
        nodes = form_key.split(",")
        if len(layoutattr) == 1:
            nodes.append("!" + layoutattr[0])
        elif len(layoutattr) > 1:
            raise RuntimeError(f"Malformed key: {key}")
        return uuid, treepath, start, stop, nodes

    def __getitem__(self, key):
        uuid, treepath, start, stop, nodes = UprootSourceMapping.interpret_key(
            key)
        if UprootSourceMapping._debug:
            print("Gettting:", uuid, treepath, start, stop, nodes)
        stack = []
        skip = False
        for node in nodes:
            if skip:
                skip = False
                continue
            elif node == "!skip":
                skip = True
                continue
            elif node == "!load":
                branch = self._tree(uuid, treepath)[stack.pop()]
                stack.append(branch.array(entry_start=start, entry_stop=stop))
            elif node.startswith("!"):
                tname = node[1:]
                if not hasattr(transforms, tname):
                    raise RuntimeError(
                        f"Syntax error in form_key: no transform named {tname}"
                    )
                getattr(transforms, tname)(stack)
            else:
                stack.append(node)
        if len(stack) != 1:
            raise RuntimeError(f"Syntax error in form key {nodes}")
        out = stack.pop()
        try:
            out = numpy.array(out)
        except ValueError:
            if UprootSourceMapping._debug:
                print(out)
            raise RuntimeError(
                f"Left with non-bare array after evaluating form key {nodes}")
        return out

    def __len__(self):
        raise NotImplementedError

    def __iter__(self):
        raise NotImplementedError
Exemple #12
0
class ImagenetModel:
    ''' A class for featurizing images using pre-trained neural nets '''
    def __init__(self,
                 include_top=False,
                 pooling=None,
                 n_channels=None,
                 cache_size=int(1e4),
                 model='inception_v3',
                 weights='imagenet',
                 cache_dir=None,
                 n_objects=None):

        self.include_top = include_top  # determines if used for classification or featurization, TODO separate into two classes?
        self.n_channels = n_channels
        self.n_objects = n_objects
        self.pooling = pooling

        self.failed_urls = set()

        # NOTE: set cache_dir to None to turn off caching
        if cache_dir:
            # create default cache path in the current file dir w/ filename specifying config
            config = [
                f'objects-{NUM_OBJECTS}' if include_top else 'features',
                str(cache_size), model, pooling if pooling else '',
                str(n_channels) if n_channels else ''
            ]
            config_str = '-'.join([c for c in config if c
                                   ])  # filter out empty strings and join w/ -
            cache_fname = f'imagenet-cache-{config_str}.pkl'
            self.cache_path = os.path.join(cache_dir, cache_fname)
            # TODO allow larger cache_size to still load from previous smaller caches
        else:
            self.cache_path = None

        if self.cache_path and os.path.isfile(self.cache_path):
            self.load_cache()
        else:
            self.cache = LRUCache(cache_size)

        if model == 'xception':
            self.model = xception.Xception(weights=weights,
                                           include_top=include_top,
                                           pooling=pooling)
            self.preprocess = xception.preprocess_input
            self.target_size = (299, 299)
            if include_top:
                self.decode = xception.decode_predictions
            else:
                self.output_dim = (n_channels if n_channels else
                                   2048) * (1 if pooling else 10**2)
        elif model == 'inception_v3':
            self.model = inception_v3.InceptionV3(weights=weights,
                                                  include_top=include_top,
                                                  pooling=pooling)
            self.preprocess = inception_v3.preprocess_input
            self.target_size = (299, 299)
            if include_top:
                self.decode = inception_v3.decode_predictions
            else:
                self.output_dim = (n_channels if n_channels else
                                   2048) * (1 if pooling else 8**2)
        elif model == 'mobilenet_v2':
            self.model = mobilenetv2.MobileNetV2(weights=weights,
                                                 include_top=include_top,
                                                 pooling=pooling)
            self.preprocess = mobilenetv2.preprocess_input
            self.target_size = (244, 244)
            if include_top:
                self.decode = mobilenetv2.decode_predictions
            else:
                self.output_dim = (n_channels if n_channels else
                                   1280) * (1 if pooling else 7**2)
        else:
            raise Exception('model option not implemented')

        # NOTE: we force the imagenet model to load in the same scope as the functions using it to avoid tensorflow weirdness
        self.model.predict(np.zeros((1, *self.target_size, 3)))
        logging.info('imagenet loaded')

    def save_cache(self, cache_path=None):
        ''' saves cache of image identifier (url or path) to image features at the given cache path '''
        logging.info('saving cache')
        cache_path = cache_path if cache_path else self.cache_path
        with open(cache_path, 'wb') as pkl_file:
            pickle.dump({
                'cache': self.cache,
                'failed_urls': self.failed_urls
            }, pkl_file)

    def load_cache(self, cache_path=None):
        ''' loads cache of image identifier (url or path) to image features '''
        cache_path = cache_path if cache_path else self.cache_path
        logging.info(f'loading cache from {cache_path}')
        if not os.path.isfile(cache_path):
            logging.error(f'cache file not present at: {cache_path}')
        else:
            with open(cache_path, 'rb') as pkl_file:
                pkl_data = pickle.load(pkl_file)
                self.cache = pkl_data['cache']
                self.failed_urls = pkl_data['failed_urls']

            logging.info(
                f'successfully loaded cache with {len(self.cache)} entries \
                         and failed urls with {len(self.failed_urls)} entries')

    def get_objects_from_url(self, image_url, ignore_failed=True):
        ''' detects objects from image in a url, returns None if url download failed '''
        if image_url not in self.cache:
            # skip if we're ignoring previously failed urls
            if ignore_failed and image_url in self.failed_urls:
                return

            # download image and convert into numpy array
            image_array = image_array_from_url(image_url,
                                               target_size=self.target_size)
            if image_array is None:
                # if url request failed, add to failed set
                self.failed_urls.add(image_url)
                return

            # add a dim if needed
            if image_array.ndim == 3:
                image_array = image_array[None, :, :, :]

            # use the imagenet model to detect the objects in the image and add result to cache
            self.cache[image_url] = self.get_objects(image_array)

        # returned cached result
        return self.cache[image_url]

    def get_objects(self, image_array):
        ''' detects objects in image provided as an array '''
        logging.debug(f'recognizing objects')
        image_array = self.preprocess(image_array)
        objects = self.model.predict(image_array)
        objects = self.decode(objects, top=self.n_objects)[0]
        return {
            obj[1]: obj[2]
            for obj in objects
        }  # objects = [{'object': obj[1], 'score': obj[2]} for obj in objects]

    def get_features_from_paths(self, image_paths):
        ''' takes a list of image filepaths and returns the features resulting from applying the imagenet model to those images '''
        if self.include_top:
            raise Exception(
                'getting features from a classification model with include_top=True is currently not supported'
            )
        # TODO add caching for paths like urls
        images_array = np.array(
            (image_array_from_path(fpath, target_size=self.target_size)
             for fpath in image_paths))
        return self.get_features(images_array)

    def get_features_from_url(self, image_url):
        ''' attempt to download the image at the given url, then return the imagenet features if successful, and None if not '''
        if self.include_top:
            raise Exception(
                'getting features from a classification model with include_top=True is currently not supported'
            )

        if image_url not in self.cache:
            image_array = image_array_from_url(image_url,
                                               target_size=self.target_size)
            if image_array is None:
                self.failed_urls.add(image_url)
                return
            else:
                if image_array.ndim == 3:
                    image_array = image_array[None, :, :, :]
                self.cache[image_url] = self.get_features(image_array)

        return self.cache.get(image_url)

    def get_features_from_url_batch(self, image_urls, ignore_failed=True):
        ''' takes a list of image urls and returns the features resulting from applying the imagenet model to
        successfully downloaded images along with the urls that were successful. Cached values are used when available
        '''
        if self.include_top:
            raise Exception(
                'getting features from a classification model with include_top=True is currently not supported'
            )
        # split urls into new ones and ones that have cached results
        new_urls = image_urls
        cached_urls = []
        # new_urls, cached_urls = partition(lambda x: x in self.cache, image_urls, as_list=True)
        logging.info(f'getting image arrays from {len(image_urls)} urls; \
                     {len(new_urls)} new urls and {len(cached_urls)} cached urls'
                     )
        if cached_urls:
            logging.debug(
                f'loading features for {len(cached_urls)} images from cache')
            if len(cached_urls) == 1:
                cached_image_features = self.cache[cached_urls[0]]
                # print('pre cached dim:', cached_image_features.ndim)
                # if cached_image_features.ndim == 1:
                #     cached_image_features = cached_image_features[None, :]
                # elif cached_image_features.ndim == 3:
                #     assert cached_image_features.shape[:1] == (1, 1)
                #     cached_image_features = cached_image_features[0, :, :]
                # print('post cached dim:', cached_image_features.ndim)
                assert cached_image_features.ndim == 2
            else:
                cached_image_features = np.array(
                    [self.cache[url] for url in cached_urls])
                # print('pre cached dim:', cached_image_features.ndim)
                # if cached_image_features.ndim == 1:
                #     cached_image_features = cached_image_features[None, :]
                # elif cached_image_features.ndim == 3:
                #     assert cached_image_features.shape[:1] == (1, 1)
                #     cached_image_features = cached_image_features[0, :, :]
                # print('cached dim:', cached_image_features.ndim)
                assert cached_image_features.ndim == 2
            # print('cached dim:', cached_image_features.ndim)

        # remove new urls known to fail
        if new_urls and ignore_failed:
            logging.debug(
                f'num new urls before dopping fails: {len(new_urls)}')
            new_urls = list(
                filter(lambda x: x not in self.failed_urls, new_urls))

        if new_urls:
            logging.debug(
                f'computing features for {len(new_urls)} images from urls')
            # attempt to download images and convert to constant-size arrays  # TODO what to do with failed urls, try again, cache failure?
            new_image_arrays = (image_array_from_url(
                url, target_size=self.target_size) for url in new_urls)

            # filter out unsuccessful image urls which output None
            failed_images, downloaded_images = partition(
                lambda x: x[1] is not None,
                zip(new_urls, new_image_arrays),
                as_list=True)

            logging.debug(f'found {len(failed_images)} failed url images')
            logging.info(
                f'successfully downloaded {len(downloaded_images)} url images')
            # add failed urls to list
            logging.debug('saving failed urls to failed set')
            self.failed_urls.update(pair[0] for pair in failed_images)
            # downloaded_images = [(url, img) for url, img in zip(new_urls, new_image_arrays) if img is not None]

            if downloaded_images:
                # unzip any successful url, img pairs and convert data types
                new_urls, new_image_arrays = zip(*downloaded_images)
                new_urls = list(new_urls)
                new_image_arrays = np.array(new_image_arrays)

                logging.debug(
                    f'getting features from image arrays with shape {new_image_arrays.shape}'
                )
                new_image_features = self.get_features(new_image_arrays)
                assert new_image_features.ndim == 2
                logging.debug(
                    f'got features array with shape {new_image_features.shape}'
                )
                # add new image features to cache
                logging.info('saving features to cache')

                self.cache.update(zip(new_urls, new_image_features))

        if cached_urls and new_urls and downloaded_images:
            # print('cached:', cached_image_features.shape)
            # print('new: ', new_image_features.shape)
            logging.debug('cached and new')
            # combine results
            image_features = np.vstack(
                (cached_image_features, new_image_features))
            image_urls = cached_urls + new_urls
        elif cached_urls:
            logging.debug('cached')
            image_features = cached_image_features
            image_urls = cached_urls
        elif new_urls and downloaded_images:
            logging.debug('new')
            image_features = new_image_features
            image_urls = new_urls
        else:
            logging.debug('no new or cached urls')
            return np.array([[]]), []

        return image_features, image_urls

    def get_features(self, images_array):
        ''' takes a batch of images as a 4-d array and returns the (flattened) imagenet features for those images as a 2-d array '''
        if self.include_top:
            raise Exception(
                'getting features from a classification model with include_top=True is currently not supported'
            )

        if images_array.ndim != 4:
            raise Exception(
                'invalid input shape for images_array, expects a 4d array')

        # preprocess and compute image features
        logging.debug(f'preprocessing {images_array.shape[0]} images')
        images_array = self.preprocess(images_array)
        logging.debug(f'computing image features')
        image_features = self.model.predict(images_array)

        # if n_channels is specified, only keep that number of channels
        if self.n_channels:
            logging.debug(f'truncating to first {self.n_channels} channels')
            image_features = image_features.T[:self.n_channels].T

        # reshape output array by flattening each image into a vector of features
        shape = image_features.shape
        return image_features.reshape(shape[0], np.prod(shape[1:]))

    def predict(self, images_array):
        ''' alias for get_features to more closely match scikit-learn interface '''
        return self.get_features(images_array)

    def finetune(self,
                 image_paths,
                 labels,
                 pooling='avg',
                 nclasses=2,
                 batch_size=32,
                 top_layer_epochs=1,
                 frozen_layer_count=249,
                 all_layer_epochs=5,
                 class_weight=None,
                 optimizer='rmsprop'):
        ''' Finetunes the Imagenet model iteratively on a smaller set of images with (potentially) a smaller set of classes.
            First finetunes last layer then freezes bottom N layers and retrains the rest
        '''

        # preprocess images
        images_array = np.array([
            image_array_from_path(fpath, target_size=self.target_size)
            for fpath in image_paths
        ])
        logging.debug(f'preprocessing {images_array.shape[0]} images')
        if images_array.ndim != 4:
            raise Exception(
                'invalid input shape for images_array, expects a 4d array')
        images_array = self.preprocess(images_array)

        # transform labels to categorical variable
        labels = to_categorical(labels)

        # create new model for finetuned classification
        out = self.model.output
        if self.pooling is None:
            out = GlobalAveragePooling2D()(
                out) if pooling == 'avg' else GlobalMaxPooling2D()(out)
        dense = Dense(1024, activation='relu')(out)
        preds = Dense(nclasses, activation='softmax')(dense)
        self.finetune_model = Model(inputs=self.model.input, outputs=preds)

        # freeze all convolutional InceptionV3 layers, retrain top layer
        for layer in self.finetune_model.layers:
            layer.trainable = False
        self.finetune_model.compile(optimizer=optimizer,
                                    loss='categorical_crossentropy')
        self.finetune_model.fit(images_array,
                                np.array(labels),
                                batch_size=batch_size,
                                epochs=top_layer_epochs,
                                class_weight=class_weight)

        # freeze bottom N convolutional layers, retrain top M-N layers (M = total number of layers)
        for layer in self.finetune_model.layers[:frozen_layer_count]:
            layer.trainable = False
        for layer in self.finetune_model.layers[frozen_layer_count:]:
            layer.trainable = True

        # use SGD and low learning rate to prevent catastrophic forgetting in these blocks
        self.finetune_model.compile(optimizer=SGD(lr=0.0001, momentum=0.9),
                                    loss='categorical_crossentropy')
        self.finetune_model.fit(images_array,
                                np.array(labels),
                                batch_size=batch_size,
                                epochs=all_layer_epochs,
                                class_weight=class_weight)

    def finetuned_predict(self, images_array):
        ''' Uses the finetuned model to predict on an image array. Returns array of softmax prediction probabilities 
        '''

        # preprocess images
        images_array = np.array([
            image_array_from_path(fpath, target_size=self.target_size)
            for fpath in image_paths
        ])
        logging.debug(f'preprocessing {images_array.shape[0]} images')
        if images_array.ndim != 4:
            raise Exception(
                'invalid input shape for images_array, expects a 4d array')
        images_array = self.preprocess(images_array)

        return self.finetune_model.predict(images_array)
Exemple #13
0
class ImageDataGeneration:
    """It has functionality to create generators to feed data to keras.
    """
    valid_subsets = frozenbidict({
        'training': ImageDataSubset.Training,
        'validation': ImageDataSubset.Validation,
        'prediction': ImageDataSubset.Prediction
    })

    def __init__(self,
                 dataframe,
                 input_params,
                 image_generation_params,
                 transformer=None,
                 randomize=True):
        """It initializes the dataframe object.

        Arguments:
            dataframe {Pandas DataFrame} -- A pandas dataframe object with columnar data with image names and labels.
            input_params {A InputDataParameter object} -- An input parameter object.
            image_generation_params {A ImageGenerationParameters object} -- A training data parameter object.
            transformer {A ImageDataTransformation object} -- It is used to transform the image objects.
            randomize {boolean} -- It indicates randomization of the input dataframe.
        """
        #Required parameters
        self._dataframe = dataframe
        self._input_params = input_params
        self._image_generation_params = image_generation_params

        #Optional parameters
        self._transformer = transformer
        self._randomize = randomize

        #Caching
        self._image_cache = LRUCache(
            self._image_generation_params.image_cache_size)

        #Logging
        self._logger = logging.get_logger(__name__)

        #Metrics
        self._load_slice_metric = 'get_image_objects'

        #Create metrics
        Metric.create(self._load_slice_metric)

        #Compute the training and validation boundary using the validation split.
        boundary = int(
            ceil(
                len(self._dataframe) *
                (1. - self._image_generation_params.validation_split)))
        self._logger.info(
            "Validation split: {} Identified boundary: {}".format(
                self._image_generation_params.validation_split, boundary))

        #Split the dataframe into training and validation.
        self._main_df = self._dataframe.loc[:(boundary - 1), :]
        self._validation_df = self._dataframe.loc[boundary:, :].reset_index(
            drop=True)

        n_dataframe = len(self._dataframe)
        n_main_df = len(self._main_df)
        n_validation_df = len(self._validation_df)

        self._logger.info(
            "Dataframe size: {} main set size: {} validation size: {}".format(
                n_dataframe, n_main_df, n_validation_df))

    def _get_images(self, n_images):
        """It extracts the image names from the dataframe.

        Arguments:
            n_images {An numpy.array object} -- It is a 4-D numpy array containing image data.
        """
        df_size = len(self._main_df)
        loop_count = 0
        images = set()

        while len(images) <= n_images and loop_count < df_size:
            random_index = randrange(df_size)

            for image_col in self._image_generation_params.image_cols:
                images.add(self._main_df.loc[random_index, image_col])

            loop_count += 1

        return list(images)

    def fit(self, n_images):
        """It calculates statistics on the input dataset. These are used to perform transformation.

        Arguments:
            n_images {An numpy.array object} -- It is a 4-D numpy array containing image data.
        """
        if n_images <= 0:
            ValueError(
                "Expected a positive integer for n_images. Got: {}".format(
                    n_images))

        #Input list for data fitting
        images = self._get_images(n_images)

        self._logger.info("%d images to use for data fitting", len(images))

        #Image objects
        img_objs_map = self._get_image_objects(images)
        img_objs = np.asarray(list(img_objs_map.values()))

        self._logger.info(
            "fit:: images: {} to the transformer to compute statistics".format(
                img_objs.shape))

        #Fit the data in the transformer
        self._transformer.fit(img_objs)

    def flow(self, subset='training'):
        """It creates an iterator to the input dataframe.

        Arguments:
            subset {string} -- A string to indicate select between training and validation splits.
        """
        #Validate subset parameter
        if not ImageDataGeneration.valid_subsets.get(subset):
            raise ValueError("Valid values of subset are: {}".format(
                list(ImageDataGeneration.valid_subsets.keys())))

        #Qualified subset
        q_subset = ImageDataGeneration.valid_subsets[subset]

        #Dataframe placeholder
        dataframe = None

        #Pick the correct dataframe
        if q_subset == ImageDataSubset.Training or q_subset == ImageDataSubset.Prediction:
            dataframe = self._main_df
        elif q_subset == ImageDataSubset.Validation:
            dataframe = self._validation_df

        self._logger.info("flow:: subset: {} dataset size: {}".format(
            subset, len(dataframe)))

        return ImageDataIterator(self,
                                 dataframe,
                                 self._image_generation_params.batch_size,
                                 q_subset,
                                 randomize=self._randomize)

    def _load_subset_slice(self, df_slice, subset):
        """It loads the image objects and the labels for the data frame slice.

        Arguments:
            df_slice {A pandas.DataFrame object} -- A pandas DataFrame object containing input data and labels.

        Returns:
            {An object} -- A list of image objects in prediction phase. A tuple of image objects and their labels in training phase.
        """
        self._logger.info('Using subset: %s', subset)

        #Results placeholder
        results = None

        #Load the slice
        if subset == ImageDataSubset.Training or subset == ImageDataSubset.Validation:
            results = self._load_train_phase_slice(df_slice)
        elif subset == ImageDataSubset.Prediction:
            results = self._load_predict_phase_slice(df_slice)

        return results

    def _load_train_phase_slice(self, df_slice):
        """It loads the image objects and the labels for the data frame slice.

        Arguments:
            df_slice {A pandas.DataFrame object} -- A pandas DataFrame object containing input data and labels.

        Returns:
            (Numpy object, Numpy object) -- A tuple of input data and labels.
        """
        return self._load_slice(df_slice)

    def _load_predict_phase_slice(self, df_slice):
        """It loads the image objects for the data frame slice.

        Arguments:
            df_slice {A pandas.DataFrame object} -- A pandas DataFrame object containing input data and labels.

        Returns:
            (Numpy object, Numpy object) -- A tuple of input data and labels.
        """
        images, _ = self._load_slice(df_slice)

        return images

    def _load_slice(self, df_slice):
        """It loads the image objects for the data frame slice.

        Arguments:
            df_slice {A pandas.DataFrame object} -- A pandas DataFrame object containing input data and labels.

        Returns:
            (Numpy object, Numpy object) -- A tuple of input data and labels.
        """
        #Calculate the number of classes
        num_classes = self._image_generation_params.num_classes

        #Process labels
        df_slice_y = df_slice[self._image_generation_params.label_col].values
        df_slice_y_categorical = to_categorical(
            df_slice_y,
            num_classes=num_classes) if num_classes > 2 else df_slice_y

        #Process image columns
        df_slice_x = []

        for x_col in self._image_generation_params.image_cols:
            images = df_slice[x_col].tolist()

            #Load images
            img_objs_map = self._get_image_objects(images)

            #Arrange them in the input order
            img_objs = [img_objs_map[image] for image in images]
            img_objs = np.asarray(img_objs)

            if x_col in self._image_generation_params.image_transform_cols:
                img_objs = self._apply_transformation(img_objs)

            df_slice_x.append(img_objs)

        return (df_slice_x, df_slice_y_categorical)

    def _get_image_objects(self, images):
        """It loads the image objects for the list of images.
        If the image is available, it is loaded from the cache.
        Otherwise, it is loaded from the disk.

        Arguments:
            images {[string]} -- A list of image names.
        """
        #Start recording time
        record_handle = Metric.start(self._load_slice_metric)

        img_objs = {}
        candidate_images = set(images)
        for image in candidate_images:
            #Get the image object for the current image from the cache.
            #Add to the dictionary, if it is not None.
            img_obj = self._image_cache.get(image)

            if img_obj is not None:
                img_objs[image] = img_obj

        #Create a list of missing images.
        cached_images = set(img_objs.keys())
        missing_images = [
            image for image in candidate_images if not image in cached_images
        ]

        self._logger.debug("Cached images: {} missing images: {}".format(
            cached_images, missing_images))

        #Load the missing image objects, and apply parameters.
        missing_img_objs = utils.imload(
            self._image_generation_params.dataset_location, missing_images,
            self._image_generation_params.input_shape[:2])
        missing_img_objs = self._apply_parameters(missing_img_objs)

        #Update the cache
        self._image_cache.update(zip(missing_images, missing_img_objs))

        #Update the image object dictionary with the missing image objects.
        for image, img_obj in zip(missing_images, missing_img_objs):
            img_objs[image] = img_obj

        #End recording time
        Metric.stop(record_handle, self._load_slice_metric)

        return img_objs

    def _apply_parameters(self, img_objs):
        """It processes image objects based on the input parameters.
        e.g. normalization, reshaping etc.

        Arguments:
            img_objs {numpy.ndarray} -- A numpy array of image objects.
        """
        if self._image_generation_params.normalize:
            img_objs = utils.normalize(img_objs)

        return img_objs

    def _apply_transformation(self, img_objs):
        transformed_objects = img_objs

        if self._transformer:
            transformed_objects = self._transformer.transform(img_objs)

        return transformed_objects
Exemple #14
0
class CoapLRUCache(CoapCache):
    def __init__(self, max_dim):
        """

        :param max_dim:
        """
        self.cache = LRUCache(maxsize=max_dim)

    def update(self, key, element):
        """

        :param key:
        :param element:
        :return:
        """
        print "updating cache"
        print "key: ", key.hashkey
        print "element: ", element
        self.cache.update([(key.hashkey, element)])

    def get(self, key):
        """

        :param key:
        :return: CacheElement
        """
        try:
            response = self.cache[key.hashkey]
        except KeyError:
            print "problem here"
            response = None
        return response

    def is_full(self):
        """
        :return:
        """
        if self.cache.currsize == self.cache.maxsize:
            return True
        return False

    def is_empty(self):
        """

        :return:
        """

        if self.cache.currsize == 0:
            return True
        return False

    def __str__(self):
        msg = []
        for e in self.cache.values():
            msg.append(str(e))
        return "Cache Size: {sz}\n" + "\n".join(msg)

    def debug_print(self):
        """

        :return:
        """
        print "size = ", self.cache.currsize
        list = self.cache.items()
        for key, element in list:
            print "element.max age ", element.max_age
            print "element.uri", element.uri
            print "element.freshness ", element.freshness