Beispiel #1
0
    def __init__(self, gui=False, thd=False):
        '''
        
        Create a link with netlogo. Underneath, the netlogo jvm is started
        throuhg jpype.
        
        
        :param gui: boolean, if true run netlogo with gui, otherwise run in 
                    headless mode. Defaults to false.
        :param thd: boolean, if thrue start netlogo in 3d mode. Defaults to 
                    false
        
        
        '''
        if not jpype.isJVMStarted():
            # netlogo jars
            jars = [NETLOGO_HOME + r'/lib/scala-library.jar',
                    NETLOGO_HOME + r'/lib/asm-all-3.3.1.jar',
                    NETLOGO_HOME + r'/lib/picocontainer-2.13.6.jar',
                    NETLOGO_HOME + r'/lib/log4j-1.2.16.jar',
                    NETLOGO_HOME + r'/lib/jmf-2.1.1e.jar',
                    NETLOGO_HOME + r'/lib/pegdown-1.1.0.jar',
                    NETLOGO_HOME + r'/lib/parboiled-core-1.0.2.jar',
                    NETLOGO_HOME + r'/lib/parboiled-java-1.0.2.jar',
                    NETLOGO_HOME + r'/lib/mrjadapter-1.2.jar',
                    NETLOGO_HOME + r'/lib/jhotdraw-6.0b1.jar',
                    NETLOGO_HOME + r'/lib/quaqua-7.3.4.jar',
                    NETLOGO_HOME + r'/lib/swing-layout-7.3.4.jar',
                    NETLOGO_HOME + r'/lib/jogl-1.1.1.jar',
                    NETLOGO_HOME + r'/lib/gluegen-rt-1.1.1.jar',
                    NETLOGO_HOME + r'/NetLogo.jar',
                    PYNETLOGO_HOME + r'/external_files/netlogoLink.jar']
            
            # format jars in right format for starting java virtual machine
            # TODO the use of the jre here is only relevant under windows 
            # apparently
            # might be solvable by setting netlogo home user.dir

            joined_jars = jar_separator.join(jars)
            jarpath = '-Djava.class.path={}'.format(joined_jars)
            
            jvm_handle = jpype.getDefaultJVMPath() 
            jpype.startJVM(jvm_handle, jarpath, "-Xms128M","-Xmx1024m")  
            jpype.java.lang.System.setProperty('user.dir', NETLOGO_HOME)

            if sys.platform=='darwin':
                jpype.java.lang.System.setProperty("java.awt.headless", "true");            
            
            debug("jvm started")
        
        link = jpype.JClass('netlogoLink.NetLogoLink')
        debug('NetLogoLink class found')

        if sys.platform == 'darwin' and gui:
            info('on mac only headless mode is supported')
            gui=False
        
        self.link = link(gui, thd)
        debug('NetLogoLink class instantiated')
Beispiel #2
0
    def find_box(self):
        '''
        
        Execute one iteration of the PRIM algorithm. That is, find one
        box, starting from the current state of Prim. 
        
        
        '''
        # set the indices
        self._update_yi_remaining()
        
        if self.yi_remaining.shape[0] == 0:
            info("no data remaining")
            return
        
        # log how much data and how many coi are remaining
        info(self.message.format(self.yi_remaining.shape[0],
                                 self.determine_coi(self.yi_remaining)))
        
        # make a new box that contains all the remaining data points
        box = PrimBox(self, self.box_init, self.yi_remaining[:])
        
        #  perform peeling phase
        box = self._peel(box)
        debug("peeling completed")

        # perform pasting phase        
        box = self._paste(box)
        debug("pasting completed")
        
        message = "mean: {0}, mass: {1}, coverage: {2}, density: {3} restricted_dimensions: {4}"
        message = message.format(box.mean[-1],
                                 box.mass[-1],
                                 box.coverage[-1],
                                 box.density[-1],
                                 box.res_dim[-1])

        if (self.threshold_type==ABOVE) &\
           (box.mean[-1] >= self.threshold):
            info(message)
            self.boxes.append(box)
            return box
        elif (self.threshold_type==BELOW) &\
           (box.mean[-1] <= self.threshold):
            info(message)
            self.boxes.append(box)
            return box
        else:
            # make a dump box
            info('box does not meet threshold criteria, value is {}, returning dump box'.format(box.mean[-1]))
            box = PrimBox(self, self.box_init, self.yi_remaining[:])
            self.boxes.append(box)
            return box
Beispiel #3
0
def __filter(boxes, uncertainties=[]):
    dump_box=boxes[-1]
    boxes=boxes[0:-1]
    
    uv=uncertainties
    #iterate over uncertainties
    names = []

    if uncertainties:
        uv=uncertainties
    else:
        uv = [entry[0] for entry in dump_box.dtype.descr]

    for name in uv:
        
        #determine whether to show
        for box in boxes:
            minimum = box[name][0]
            maximum = box[name][1]
            value = box.dtype.fields.get(name)[0]
            if value == 'object':
                a = dump_box[name][0]
                
                if len(a) != len(minimum):
                    ans = False
                else:
                    ans = np.all(np.equal(a, minimum))
                if not ans:
                    names.append(name)
                    break
            elif (minimum > dump_box[name][0]) or\
                 (maximum < dump_box[name][1]):
                names.append(name)
                break
    a = set(uv) -set(names)
    a = list(a)
    a.sort()
    string_list = ", ".join(a)
    
    info(string_list + " are not not visualized because they are not restricted")
    
    uv = names
    return uv
Beispiel #4
0
def perform_prim(results, 
                 classify, 
                 peel_alpha = 0.05, 
                 paste_alpha = 0.05,
                 mass_min = 0.05, 
                 threshold = None, 
                 pasting=True, 
                 threshold_type=1,
                 obj_func=def_obj_func):
    r'''
    
    perform Patient Rule Induction Method (PRIM). This function performs 
    the PRIM algorithm on the data. It uses a Python implementation of PRIM
    inspired by the `R <http://www.oga-lab.net/RGM2/func.php?rd_id=prim:prim-package>`_ 
    algorithm. Compared to the R version, the Python version is data type aware. 
    That is, real valued, ordinal, and categorical data are treated 
    differently. Moreover, the pasting phase of PRIM in the R algorithm is
    not consistent with the literature. The Python version is. 
    
    the PRIM algorithm tries to find subspaces of the input space that share
    some characteristic in the output space. The characteristic that the 
    current implementation looks at is the mean of the results. Thus, the 
    output space is 1-D, and the characteristic is assumed to be continuous.
    
    As a work around, to deal with classes, the user can supply a classify 
    function. This function should return a binary classification 
    (i.e. 1 or 0). Then, the mean of the box is indicative of the concentration 
    of cases of class 1. That is, if the specified threshold is say 0.8 and the 
    threshold_type is 1, PRIM looks for subspaces of the input space that 
    contains at least 80\% cases of class 1.   
    
    :param results: the return from :meth:`perform_experiments`.
    :param classify: either a string denoting the outcome of interest to use
                     or a function. In case of a string and time series data, 
                     the end state is used.
    :param peel_alpha: parameter controlling the peeling stage (default = 0.05). 
    :param paste_alpha: parameter controlling the pasting stage (default = 0.05).
    :param mass_min: minimum mass of a box (default = 0.05). 
    :param threshold: the threshold of the output space that boxes should meet. 
    :param pasting: perform pasting stage (default=True) 
    :param threshold_type: If 1, the boxes should go above the threshold, if -1
                           the boxes should go below the threshold, if 0, the 
                           algorithm looks for both +1 and -1.
    :param obj_func: The objective function to use. Default is 
                     :func:`def_obj_func`
    :return: a list of PRIM objects.
    
    for each box, the scenario discovery metrics *coverage* and *density* 
    are also calculated:
    
    
    .. math::
 
        coverage=\frac
                    {{\displaystyle\sum_{y_{i}\in{B}}y_{i}{'}}}
                    {{\displaystyle\sum_{y_{i}\in{B^I}}y_{i}{'}}}
    
    
    where :math:`y_{i}{'}=1` if :math:`x_{i}\in{B}` and :math:`y_{i}{'}=0`
    otherwise.
    
    
    .. math::
 
        density=\frac
                    {{\displaystyle\sum_{y_{i}\in{B}}y_{i}{'}}}
                    {{\displaystyle\left|{y_{i}}\right|\in{B}}}
    
    where :math:`y_{i}{'}=1` if :math:`x_{i}\in{B}` and :math:`y_{i}{'}=0`
    otherwise, and :math:`{\displaystyle\left|{y_{i}}\right|\in{B}}` is the
    cardinality of :math:`y_{i}`.
    
    
    Density is the ratio of the cases of interest in a box to the 
    total number of cases in that box. *density* is identical to the mean
    in case of a binary classification.  For more detail on these metrics see 
    `Bryant and Lempert (2010) <http://www.sciencedirect.com/science/article/pii/S004016250900105X>`_
    
    .. rubric:: references to relevant papers 
        
    * `original PRIM paper <http://www.springerlink.com/content/x3gpv05t34620878/>`_
    * `paper on ordinal data and PRIM <http://www.sciencedirect.com/science/article/pii/S095741740700231X>`_
        
    **ema application** 
        
    * `Lempert et al. (2006) <http://mansci.journal.informs.org/content/52/4/514>`_
    * `Groves and Lempert (2007) <http://www.sciencedirect.com/science/article/pii/S0959378006000896#ref_bib19>`_
    * `Bryant and Lempert (2010) <http://www.sciencedirect.com/science/article/pii/S004016250900105X>`_
    
    '''
    experiments, results = results
    
    #make y
    if type(classify) == StringType:
        results = results.get(classify)
        if len(results.shape) == 2:
            y = results[:, -1]
        else:
            y = results
            
        count = np.zeros(y.shape)
        count[y*threshold_type > threshold*threshold_type] = 1
        cases_of_interest = np.sum(count)
        info("number of cases of interest is %d" % (np.sum(count)))
    elif callable(classify):
        y = classify(results)
        cases_of_interest = np.sum(y)
        info("number of cases of interest is %d" % (np.sum(y)))
    else:
        raise EMAError("incorrect specification of classify, this should be a function or a string")
   

    x = experiments
    
    #perform prim
    boxes = recursivePrim.perform_prim(x, y, box_init=None, peel_alpha=peel_alpha, 
                                            paste_alpha=paste_alpha, mass_min=mass_min, 
                                            threshold=threshold, pasting=pasting, 
                                            threshold_type=threshold_type,obj_func=obj_func,
                                            cases_of_interest=cases_of_interest)
    
    #calculate scenario discovery metrics and add these to boxes
    boxes = calculate_sd_metrics(boxes, y, threshold, threshold_type)
    
    #return prim
    return boxes