def get_batches(self, batch_size, num_batches=None, shuffle=False, cluster=False): """ :param batch_size: :param num_batches: :param shuffle: :param cluster: cluster examples by their lengths; this might give performance boost (i.e. faster training). :return: """ num_batches_per_epoch = int(math.ceil(self.num_examples / batch_size)) if num_batches is None: num_batches = num_batches_per_epoch num_epochs = int(math.ceil(num_batches / num_batches_per_epoch)) if shuffle: random_idxs = random.sample(self.valid_idxs, len(self.valid_idxs)) if cluster: sorted_idxs = sorted(random_idxs, key=self._sort_key) sorted_grouped = lambda: list(grouper(sorted_idxs, batch_size)) grouped = lambda: random.sample(sorted_grouped(), num_batches_per_epoch) else: random_grouped = lambda: list(grouper(random_idxs, batch_size)) grouped = random_grouped else: raw_grouped = lambda: list(grouper(self.valid_idxs, batch_size)) grouped = raw_grouped batch_idx_tuples = itertools.chain.from_iterable( grouped() for _ in range(num_epochs)) for _ in range(num_batches): batch_idxs = tuple(i for i in next(batch_idx_tuples) if i is not None) batch_data = self.get_by_idxs(batch_idxs) shared_batch_data = {} for key, val in batch_data.items(): if key.startswith('*'): assert self.shared is not None shared_key = key[1:] shared_batch_data[shared_key] = [ index(self.shared[shared_key], each) for each in val ] batch_data.update(shared_batch_data) batch_ds = DataSet(batch_data, self.data_type, shared=self.shared) yield batch_idxs, batch_ds
def localGenerateInput(self,model,myInput): """ Function to select the next most informative point for refining the limit surface search. After this method is called, the self.inputInfo should be ready to be sent to the model @ In, model, model instance, an instance of a model @ In, myInput, list, a list of the original needed inputs for the model (e.g. list of files, etc.) @ Out, None """ if self.startAdaptive == True and self.adaptiveReady == True: LimitSurfaceSearch.localGenerateInput(self,model,myInput) #the adaptive sampler created the next point sampled vars #find the closest branch if self.hybridDETstrategy is not None: closestBranch, cdfValues, treer = self._checkClosestBranch() else : closestBranch, cdfValues = self._checkClosestBranch() if closestBranch is None: self.raiseADebug('An usable branch for next candidate has not been found => create a parallel branch!') # add pbthresholds in the grid investigatedPoint = {} for key,value in cdfValues.items(): ind = utils.find_le_index(self.branchProbabilities[key],value) if not ind: ind = 0 if value not in self.branchProbabilities[key]: self.branchProbabilities[key].insert(ind,value) self.branchValues[key].insert(ind,self.distDict[key].ppf(value)) investigatedPoint[key] = value # collect investigated point self.investigatedPoints.append(investigatedPoint) if closestBranch: info = self._retrieveBranchInfo(closestBranch) self._constructEndInfoFromBranch(model, myInput, info, cdfValues) else: # create a new tree, since there are no branches that are close enough to the adaptive request elm = ETS.HierarchicalNode(self.messageHandler,self.name + '_' + str(len(self.TreeInfo.keys())+1)) elm.add('name', self.name + '_'+ str(len(self.TreeInfo.keys())+1)) elm.add('startTime', 0.0) # Initialize the endTime to be equal to the start one... # It will modified at the end of each branch elm.add('endTime', 0.0) elm.add('runEnded',False) elm.add('running',True) elm.add('queue',False) elm.add('completedHistory', False) branchedLevel = {} for key,value in cdfValues.items(): branchedLevel[key] = utils.index(self.branchProbabilities[key],value) # The dictionary branchedLevel is stored in the xml tree too. That's because # the advancement of the thresholds must follow the tree structure elm.add('branchedLevel', branchedLevel) if self.hybridDETstrategy is not None and not self.foundEpistemicTree: # adaptive hybrid DET and not found a tree in the epistemic space # take the first tree and modify the hybridsamplerCoordinate hybridSampled = copy.deepcopy(self.TreeInfo.values()[0].getrootnode().get('hybridsamplerCoordinate')) for hybridStrategy in hybridSampled: for key in self.epistemicVariables.keys(): if key in hybridStrategy['SampledVars'].keys(): self.raiseADebug("epistemic var " + str(key)+" value = "+str(self.values[key])) hybridStrategy['SampledVars'][key] = copy.copy(self.values[key]) hybridStrategy['SampledVarsPb'][key] = self.distDict[key].pdf(self.values[key]) hybridStrategy['prefix'] = len(self.TreeInfo.values())+1 # TODO: find a strategy to recompute the probability weight here (for now == PointProbability) hybridStrategy['PointProbability'] = reduce(mul, self.inputInfo['SampledVarsPb'].values()) hybridStrategy['ProbabilityWeight'] = reduce(mul, self.inputInfo['SampledVarsPb'].values()) elm.add('hybridsamplerCoordinate', hybridSampled) # Here it is stored all the info regarding the DET => we create the info for all the branchings and we store them self.TreeInfo[self.name + '_' + str(len(self.TreeInfo.keys())+1)] = ETS.HierarchicalTree(self.messageHandler,elm) self._createRunningQueueBeginOne(self.TreeInfo[self.name + '_' + str(len(self.TreeInfo.keys()))],branchedLevel, model,myInput) return DynamicEventTree.localGenerateInput(self,model,myInput)
def localFinalizeActualSampling(self,jobObject,model,myInput,genRunQueue=True): """ General function (available to all samplers) that finalize the sampling calculation just ended. In this case (DET), The function reads the information from the ended calculation, updates the working variables, and creates the new inputs for the next branches @ In, jobObject, instance, an instance of a JobHandler @ In, model, model instance, it is the instance of a RAVEN model @ In, myInput, list, the generating input @ In, genRunQueue, bool, optional, True if the RunQueue needs to be updated @ Out, None """ self.workingDir = model.workingDir # returnBranchInfo = self.__readBranchInfo(jobObject.output) # Get the parent element tree (xml object) to retrieve the information needed to create the new inputs parentNode = self._retrieveParentNode(jobObject.identifier) # set runEnded and running to true and false respectively parentNode.add('runEnded',True) parentNode.add('running',False) parentNode.add('endTime',self.actualEndTime) # Read the branch info from the parent calculation (just ended calculation) # This function stores the information in the dictionary 'self.actualBranchInfo' # If no branch info, this history is concluded => return if not self.__readBranchInfo(jobObject.output, jobObject.getWorkingDir()): parentNode.add('completedHistory', True) return False # Collect the branch info in a multi-level dictionary endInfo = {'endTime':self.actualEndTime,'endTimeStep':self.actualEndTs,'branchDist':list(self.actualBranchInfo.keys())[0]} endInfo['branchChangedParams'] = self.actualBranchInfo[endInfo['branchDist']] # check if RELAP7 mode is activated, in case prepend the "<distribution>" string if any("<distribution>" in s for s in self.branchProbabilities.keys()): endInfo['branchDist'] = self.toBeSampled.keys()[self.toBeSampled.values().index(endInfo['branchDist'])] #endInfo['branchDist'] = "<distribution>"+endInfo['branchDist'] parentNode.add('actualEndTimeStep',self.actualEndTs) # # Get the parent element tree (xml object) to retrieve the information needed to create the new inputs # if(jobObject.identifier == self.TreeInfo[self.rootToJob[jobObject.identifier]].getrootnode().name): endInfo['parentNode'] = self.TreeInfo[self.rootToJob[jobObject.identifier]].getrootnode() # else: endInfo['parentNode'] = list(self.TreeInfo[self.rootToJob[jobObject.identifier]].getrootnode().iter(jobObject.identifier))[0] endInfo['parentNode'] = parentNode # get the branchedLevel dictionary branchedLevel = {} for distk, distpb in zip(endInfo['parentNode'].get('SampledVarsPb').keys(),endInfo['parentNode'].get('SampledVarsPb').values()): #for distk, distpb in zip(endInfo['parentNode'].get('initiatorDistribution'),endInfo['parentNode'].get('PbThreshold')): if distk not in self.epistemicVariables.keys(): branchedLevel[distk] = utils.index(self.branchProbabilities[distk],distpb) if not branchedLevel: self.raiseAnError(RuntimeError,'branchedLevel of node '+jobObject.identifier+'not found!') # Loop of the parameters that have been changed after a trigger gets activated for key in endInfo['branchChangedParams']: endInfo['n_branches'] = 1 + int(len(endInfo['branchChangedParams'][key]['actualValue'])) if(len(endInfo['branchChangedParams'][key]['actualValue']) > 1): # Multi-Branch mode => the resulting branches from this parent calculation (just ended) # will be more then 2 # unchangedPb = probability (not conditional probability yet) that the event does not occur unchangedPb = 0.0 try: # changed_pb = probability (not conditional probability yet) that the event A occurs and the final state is 'alpha' """ for pb in xrange(len(endInfo['branchChangedParams'][key]['associatedProbability'])): unchangedPb = unchangedPb + endInfo['branchChangedParams'][key]['associatedProbability'][pb] except KeyError: self.raiseAWarning("KeyError:"+str(key)) if(unchangedPb <= 1): endInfo['branchChangedParams'][key]['unchangedPb'] = 1.0-unchangedPb else: self.raiseAWarning("unchangedPb > 1:"+str(unchangedPb)) else: # Two-Way mode => the resulting branches from this parent calculation (just ended) = 2 if branchedLevel[endInfo['branchDist']] > len(self.branchProbabilities[endInfo['branchDist']])-1: pb = 1.0 else: pb = self.branchProbabilities[endInfo['branchDist']][branchedLevel[endInfo['branchDist']]] endInfo['branchChangedParams'][key]['unchangedPb'] = 1.0 - pb endInfo['branchChangedParams'][key]['associatedProbability'] = [pb] self.branchCountOnLevel = 0 # # set runEnded and running to true and false respectively # endInfo['parentNode'].add('runEnded',True) # endInfo['parentNode'].add('running',False) # endInfo['parentNode'].add('endTime',self.actualEndTime) # The branchedLevel counter is updated if branchedLevel[endInfo['branchDist']] < len(self.branchProbabilities[endInfo['branchDist']]): branchedLevel[endInfo['branchDist']] += 1 # Append the parent branchedLevel (updated for the new branch/es) in the list tha contains them # (it is needed in order to avoid overlapping among info coming from different parent calculations) # When this info is used, they are popped out self.branchedLevel.append(branchedLevel) # Append the parent end info in the list tha contains them # (it is needed in order to avoid overlapping among info coming from different parent calculations) # When this info is used, they are popped out self.endInfo.append(endInfo) # Compute conditional probability self.computeConditionalProbability() # Create the inputs and put them in the runQueue dictionary (if genRunQueue is true) if genRunQueue: self._createRunningQueue(model,myInput) return True