def testJArraySetItemSlice(self): ja = jpype.JArray(jpype.JInt)([1, 2, 3, 4, 5, 6]) ja[0:-1:2] = [-1, -1, -1] self.assertEqual(list(ja[:]), [-1, 2, -1, 4, -1, 6])
def testByteArrayIntoVector(self): ba = jpype.JArray(jpype.JByte)(b'123') v = jpype.java.util.Vector(1) v.add(ba) self.assertEqual(len(v), 1) self.assertNotEqual(v[0], None)
def testJArrayFail1(self): with self.assertRaises(TypeError): jpype.JArray(jpype.JInt, 2, 2)
def makeStringArray(val): array = jpype.JArray(c.java.lang.String)(val) return array
def testVarArgsStringTest(self): strArray = jpype.JArray(jpype.JString) self.assertTrue( compareList(self.VarArgs.callString('a', 'b'), ['a', 'b'])) self.assertTrue(compareList(self.VarArgs.callString('a'), ['a'])) self.assertTrue(compareList(self.VarArgs.callString(), []))
def load_array_class(self, fully_qualified_array_item_class, array_dimension=1): return jpype.JArray( self.process_class_name(fully_qualified_array_item_class), array_dimension)
def testHashArray(self): self.assertIsNotNone(hash(jpype.JArray(jpype.JInt)([1, 2, 3])))
def pid_frankfurt(self, s1, s2, t, opts): """Estimate partial information decomposition of discrete variables. The pid estimator returns estimates of shared information, unique information and synergistic information that two random variables X and Y have about a third variable Z. The estimator finds these estimates by permuting the initial joint probability distribution of X, Y, and Z to find a permuted distribution Q that minimizes the unique information in X about Z (as proposed by Bertschinger and colleagues). The unique in- formation is defined as the conditional mutual information I(X;Z|Y). The estimator iteratively permutes the joint probability distribution of X, Y, and Z under the constraint that the marginal distributions (X, Z) and (Y, Z) stay constant. This is done by swapping two realizations of X which have the same corresponding value in Z, e.g.: X [1, 0, 1, 1, 0, 1, 0, 0, 1, 1] Y [0, 0, 1, 1, 1, 0, 0, 0, 1, 1] --------------------------------- Z [1, 1, 0, 0, 0, 1, 1, 0, 1, 0] Possible swaps: X[0] and X[1]; X[0] and X[4]; X[2] and X[8]; ... After each swap, I(X;Z|Y) is re-calculated under the new distribution; if the CMI is lower than the current permutation is kept and the next swap is tested. The iteration stops after the provided number of iterations. Example: import numpy as np import pid n = 5000 alph = 2 x = np.random.randint(0, alph, n) y = np.random.randint(0, alph, n) z = np.logical_xor(x, y).astype(int) cfg = { 'alphabetsize': 2, 'jarpath': '/home/user/infodynamics-dist-1.3/infodynamics.jar', 'iterations': 10000 } [est, opt] = pid(x, y, z, cfg) Args: s1 : numpy array 1D array containing realizations of a discrete random variable (this is the source variable the algorithm calculates the actual UI for) s2 : numpy array 1D array containing realizations of a discrete random variable (the other source variable) t : numpy array 1D array containing realizations of a discrete random variable opts : dict estimation parameters - 'alphabetsize' - no. values in each variable s1, s2, t - 'jarpath' - string with path to JIDT jar file - 'iterations' - no. iterations of the estimator Returns: dict estimated decomposition, contains: MI/CMI values computed from non-permuted distributions; PID estimates (shared, synergistic, unique information); I(target;s1,s2) under permuted distribution Q dict additional information about iterative optimization, contains: final permutation Q; opts dictionary; array with I(target:s1|s2) for each iteration; array with delta I(target:s1|s2) for each iteration; I(target:s1,s2) for each iteration Note: variables names joined by "_" enter a mutual information computation together i.e. mi_va1_var2 --> I(var1 : var2). Variables names joined directly form a new joint variable mi_var1var2_var3 --> I(var3:(var1,var2)) """ _check_input(s1, s2, t, opts) # make deep copies of input arrays to avoid side effects s1_cp = s1.copy() s2_cp = s2.copy() t_cp = t.copy() # get estimation parameters try: jarpath = opts['jarpath'] except TypeError: print('The opts argument should be a dictionary.') raise except KeyError: print('"jarpath" is missing from the opts dictionary.') raise try: alph_s1 = opts['alph_s1'] except KeyError: print('"alphabetsize" is missing from the opts dictionary.') raise try: alph_s2 = opts['alph_s2'] except KeyError: print('"alphabetsize" is missing from the opts dictionary.') raise try: alph_t = opts['alph_t'] except KeyError: print('"alphabetsize" is missing from the opts dictionary.') raise try: iterations = opts['iterations'] except KeyError: print('"iterations" is missing from the opts dictionary.') raise if not jp.isJVMStarted(): jp.startJVM(jp.getDefaultJVMPath(), '-ea', '-Djava.class.path=' + jarpath, "-Xmx3000M") # what if it's there already - do we have to attach to it? # transform variables as far as possible outside the loops below # (note: only these variables should change when executing the loop) target_jA = jp.JArray(jp.JInt, t.ndim)(t.tolist()) s2_jA = jp.JArray(jp.JInt, s2.ndim)(s2.tolist()) s1_list = s1_cp.tolist() s1_dim = s1_cp.ndim Cmi_calc_class = (jp.JPackage('infodynamics.measures.discrete'). ConditionalMutualInformationCalculatorDiscrete) Mi_calc_class = (jp.JPackage( 'infodynamics.measures.discrete').MutualInformationCalculatorDiscrete) # # cmi_calc = Cmi_calc_class(alphabet,alphabet,alphabet) cmi_calc_target_s1_cond_s2 = Cmi_calc_class(alph_t, alph_s1, alph_s2) # MAX THE CORRECT WAY TO GO? alph_max_s1_t = max(alph_s1, alph_t) alph_max_s2_t = max(alph_s2, alph_t) alph_max = max(alph_s1 * alph_s2, alph_t) # mi_calc = Mi_calc_class(alphabet) mi_calc_s1 = Mi_calc_class(alph_max_s1_t) mi_calc_s2 = Mi_calc_class(alph_max_s2_t) # jointmi_calc = Mi_calc_class(alphabet ** 2) jointmi_calc = Mi_calc_class(alph_max) print("initialized all estimators") cmi_target_s1_cond_s2 = _calculate_cmi(cmi_calc_target_s1_cond_s2, t_cp, s1_cp, s2_cp) jointmi_s1s2_target = _calculate_jointmi(jointmi_calc, s1_cp, s2_cp, t_cp) # print("Original joint mutual information: {0}".format(jointmi_s1s2_target)) mi_target_s1 = _calculate_mi(mi_calc_s1, s1_cp, t_cp) # print("Original mutual information I(target:s1): {0}".format(mi_target_s1)) mi_target_s2 = _calculate_mi(mi_calc_s2, s2_cp, t_cp) # print("Original mutual information I(target:s2): {0}".format(mi_target_s2)) print("Original redundancy - synergy: {0}".format(mi_target_s1 + mi_target_s2 - jointmi_s1s2_target)) n = t_cp.shape[0] reps = iterations + 1 ind = np.arange(n) # collect estimates in each iteration cmi_q_target_s1_cond_s2_all = -np.inf * np.ones(reps).astype('float128') cmi_q_target_s1_cond_s2_delta = -np.inf * np.ones(reps).astype('float128') cmi_q_target_s1_cond_s2_all[0] = cmi_target_s1_cond_s2 unsuccessful = 0 for i in range(1, reps): s1_new_list = s1_list ind_new = ind # swapping: pick sample at random, find all other samples that # are potential matches (have the same value in target), pick one of # the matches for the actual swap swap_1 = np.random.randint(n) swap_candidates = np.where(t_cp == t_cp[swap_1])[0] swap_2 = np.random.choice(swap_candidates) # swap value in s1 and index to keep track s1_new_list[swap_1], s1_new_list[swap_2] = (s1_new_list[swap_2], s1_new_list[swap_1]) ind_new[swap_1], ind_new[swap_2] = (ind_new[swap_2], ind_new[swap_1]) # calculate CMI under new swapped distribution cmi_new = _calculate_cmi_from_jA_list(cmi_calc_target_s1_cond_s2, target_jA, s1_new_list, s1_dim, s2_jA) if (np.less_equal(cmi_new, cmi_q_target_s1_cond_s2_all[i - 1])): s1_list = s1_new_list ind = ind_new cmi_q_target_s1_cond_s2_all[i] = cmi_new cmi_q_target_s1_cond_s2_delta[i] = cmi_q_target_s1_cond_s2_all[ i - 1] - cmi_new else: cmi_q_target_s1_cond_s2_all[i] = cmi_q_target_s1_cond_s2_all[i - 1] unsuccessful += 1 print('Unsuccessful swaps: {0}'.format(unsuccessful)) # convert the final s1 back to an array s1_final = np.asarray(s1_new_list, dtype=np.int) # estimate unq/syn/shd information jointmi_q_s1s2_target = _calculate_jointmi(jointmi_calc, s1_final, s2_cp, t_cp) unq_s1 = _get_last_value( cmi_q_target_s1_cond_s2_all) # Bertschinger, 2014, p. 2163 # NEED TO REINITIALISE the calculator cmi_calc_target_s2_cond_s1 = Cmi_calc_class(alph_t, alph_s2, alph_s1) unq_s2 = _calculate_cmi(cmi_calc_target_s2_cond_s1, t_cp, s2_cp, s1_final) # Bertschinger, 2014, p. 2166 syn_s1s2 = jointmi_s1s2_target - jointmi_q_s1s2_target # Bertschinger, 2014, p. 2163 shd_s1s2 = mi_target_s1 + mi_target_s2 - jointmi_q_s1s2_target # Bertschinger, 2014, p. 2167 estimate = { 'unq_s1': unq_s1, 'unq_s2': unq_s2, 'shd_s1s2': shd_s1s2, 'syn_s1s2': syn_s1s2, 'jointmi_q_s1s2_target': jointmi_q_s1s2_target, 'orig_cmi_target_s1_cond_s2': cmi_target_s1_cond_s2, 'orig_jointmi_s1s2_target': jointmi_s1s2_target, 'orig_mi_target_s1': mi_target_s1, 'orig_mi_target_s2': mi_target_s2 } # useful outputs for plotting/debugging optimization = { 'q': ind_new, 'unsuc_swaps': unsuccessful, 'cmi_q_target_s1_cond_s2_all': cmi_q_target_s1_cond_s2_all, 'cmi_q_target_s1_cond_s2_delta': cmi_q_target_s1_cond_s2_delta, 'opts': opts } return estimate, optimization
def testJArrayConversionFail(self): jarr = jpype.JArray(jpype.JInt)(self.VALUES) with self.assertRaises(TypeError): jarr[1] = 'a'
def jidt_discrete(self, var1, var2, conditional, opts=None): """Calculate CMI with JIDT's implementation for discrete variables. Calculate the conditional mutual information between two variables given the third. Call JIDT via jpype and use the discrete estimator. References: Lizier, Joseph T. (2014). JIDT: an information-theoretic toolkit for studying the dynamics of complex systems. Front. Robot. AI, 1(11). This function is ment to be imported into the set_estimator module and used as a method in the Estimator_cmi class. Args: self : instance of Estimator_cmi function is supposed to be used as part of the Estimator_cmi class var1 : numpy array (either of integers or doubles to be discretised) realisations of the first random variable. Can be multidimensional (i.e. multivariate) where dimensions of the array are realisations x variable dimension var2 : numpy array (either of integers or doubles to be discretised) realisations of the second random variable. Can be multidimensional (i.e. multivariate) where dimensions of the array are realisations x variable dimension conditional : numpy array (either of integers or doubles to be discretised) realisations of the conditional random variable. Can be multidimensional (i.e. multivariate) where dimensions of the array are realisations x variable dimension opts : dict [optional] sets estimation parameters: - 'num_discrete_bins' - number of discrete bins/levels or the base of each dimension of the discrete variables (default=2 for binary). If this is set, then parameters 'alph1', 'alph2' and 'alphc' are all set to this value - 'alph1' - number of discrete bins/levels for var1 (default=2 for binary, or the value set for 'num_discrete_bins') - 'alph2' - number of discrete bins/levels for var2 (default=2 for binary, or the value set for 'num_discrete_bins') - 'alphc' - number of discrete bins/levels for conditional (default=2 for binary, or the value set for 'num_discrete_bins') - 'discretise_method' - if and how to discretise incoming continuous variables to discrete values. 'max_ent' means to use a maximum entropy binning 'equal' means to use equal size bins 'none' means variables are already discrete (default='none') - 'debug' - set debug prints from the calculator on Returns: float conditional mutual information Note: """ # Parse parameters: if opts is None: opts = {} alph1 = int(opts.get('alph1', 2)) alph2 = int(opts.get('alph2', 2)) alphc = int(opts.get('alphc', 2)) try: num_discrete_bins = int(opts['num_discrete_bins']) alph1 = num_discrete_bins alph2 = num_discrete_bins alphc = num_discrete_bins except KeyError: # Do nothing, we don't need the parameter if it is not here pass discretise_method = opts.get('discretise_method', 'none') debug = opts.get('debug', False) # Work out the number of samples and dimensions for each variable, before # we collapse all dimensions down: if len(var1.shape) > 1: # var1 is is multidimensional var1_dimensions = var1.shape[1] else: # var1 is unidimensional var1_dimensions = 1 if len(var2.shape) > 1: # var2 is is multidimensional var2_dimensions = var2.shape[1] else: # var2 is unidimensional var2_dimensions = 1 # Treat the conditional variable separately, as we're allowing # this to be null and then calculating a MI instead: if (conditional is None) or (alphc == 0): alphc = 0 varc_dimensions = 0 # Then we will make a MI call here else: # Else we have a non-trivial conditional variable: assert(conditional.size != 0), 'Conditional Array is empty.' if len(conditional.shape) > 1: # conditional is is multidimensional varc_dimensions = conditional.shape[1] else: # conditional is unidimensional varc_dimensions = 1 # Now discretise if required if (discretise_method == 'equal'): var1 = utils.discretise(var1, alph1) var2 = utils.discretise(var2, alph2) if (alphc > 0): conditional = utils.discretise(conditional, alphc) elif (discretise_method == 'max_ent'): var1 = utils.discretise_max_ent(var1, alph1) var2 = utils.discretise_max_ent(var2, alph2) if (alphc > 0): conditional = utils.discretise_max_ent(conditional, alphc) elif (discretise_method == 'none'): # check if data is really discretised assert issubclass(var1.dtype.type, np.integer), ('No discretisation ' 'requested, but input 1 is not an integer numpy array.') assert issubclass(var2.dtype.type, np.integer), ('No discretisation ' 'requested, but input 2 is not an integer numpy array.') assert min(var1) >= 0, 'Minimum of input 1 is smaller than 0.' assert min(var2) >= 0, 'Minimum of input 1 is smaller than 0.' assert max(var1) < alph1, ('Maximum of input 1 is larger than the ' 'alphabet size - 1.') assert max(var2) < alph2, ('Maximum of input 2 is larger than the ' 'alphabet size - 1.') else: raise ValueError('Unkown discretisation method.') # Then collapse any mulitvariates into univariate arrays: var1 = utils.combine_discrete_dimensions(var1, alph1) var2 = utils.combine_discrete_dimensions(var2, alph2) if (alphc > 0): conditional = utils.combine_discrete_dimensions(conditional, alphc) # And finally make the CMI calculation: jarLocation = resource_filename(__name__, 'infodynamics.jar') if not jp.isJVMStarted(): jp.startJVM(jp.getDefaultJVMPath(), '-ea', ('-Djava.class.path=' + jarLocation)) if (alphc > 0): # We have a non-trivial conditional, so make a proper conditional MI calculation calcClass = (jp.JPackage('infodynamics.measures.discrete'). ConditionalMutualInformationCalculatorDiscrete) calc = calcClass(int(math.pow(alph1, var1_dimensions)), int(math.pow(alph2, var2_dimensions)), int(math.pow(alphc, varc_dimensions))) calc.setDebug(debug) calc.initialise() # Unfortunately no faster way to pass numpy arrays in than this list conversion calc.addObservations(jp.JArray(jp.JInt, 1)(var1.tolist()), jp.JArray(jp.JInt, 1)(var2.tolist()), jp.JArray(jp.JInt, 1)(conditional.tolist())) return calc.computeAverageLocalOfObservations() else: # We have no conditional, so make an MI calculation calcClass = (jp.JPackage('infodynamics.measures.discrete'). MutualInformationCalculatorDiscrete) calc = calcClass(int(max(math.pow(alph1, var1_dimensions), math.pow(alph2, var2_dimensions))), 0) calc.setDebug(debug) calc.initialise() # Unfortunately no faster way to pass numpy arrays in than this list conversion calc.addObservations(jp.JArray(jp.JInt, 1)(var1.tolist()), jp.JArray(jp.JInt, 1)(var2.tolist())) return calc.computeAverageLocalOfObservations()
def _jStringArray(self, elements): return jpype.JArray(java.lang.String)(elements)
def testJArrayEQ(self): ja = jpype.JArray(jpype.JInt)([1, 2, 3, 4]) ja == [1, 2]
def testJArraySetItemSlice(self): ja = jpype.JArray(jpype.JInt)([1, 2, 3, 4]) ja[1:] = [3, 4, 5] self.assertEqual(list(ja[:]), [1, 3, 4, 5])
def testJArrayGetItemSlice(self): ja = jpype.JArray(jpype.JInt)([1, 2, 3, 4]) ja[1:]
def testArrayCtor5(self): jarray0 = jpype.JArray("java.lang.Object") jarray = jpype.JArray(jarray0) self.assertTrue(issubclass(jarray, jpype.JArray)) self.assertTrue(isinstance(jarray(10), jpype.JArray))
def testJArraySliceLength(self): jarr = jpype.JArray(jpype.JInt)(self.VALUES) jarr[1:2] = [1] with self.assertRaises(ValueError): jarr[1:2] = [1, 2, 3]
def testObjectNullArraySlice(self): # Check for bug in 0.7.0 array = jpype.JArray(jpype.JObject)([ None, ]) self.assertEqual(array[:], (None, ))
def testInitFromNPIntArray(self): import numpy as np n = 100 a = np.random.random(n).astype(np.int) jarr = jpype.JArray(jpype.JInt)(a) self.assertCountEqual(a, jarr)
def _java_sql_blob(data): return jpype.JArray(jpype.JByte, 1)(data)
def testInitFromNPDoubleArrayFloat64(self): import numpy as np n = 100 a = np.random.random(n).astype(np.float64) jarr = jpype.JArray(jpype.JDouble)(a) self.assertCountEqual(a, jarr)
def makeIntArray(val): array = jpype.JArray(c.java.lang.Integer)(len(val) or val) for i in range(0, len(val)): array[i] = (c.java.lang.Integer(val[i])) return array
def testInitFromNPFloatArrayInt(self): import numpy as np a = np.array([1, 2, 3], dtype=np.int32) jarr = jpype.JArray(jpype.JFloat)(a) self.assertCountEqual(a, jarr)
def analyze_pair(x, y, exp=0.6, c=15, missing_value=None): """ Calculate various MINE statistics on a relationship between two scalar vectors Arguments: - **x** first vector - **y** second vector - **exp** (from MINE:) "exponent of the equation B(n) = n^alpha" (default: 0.6) - **c** (from MINE:) "determine by what factor clumps may outnumber columns when OptimizeXAxis is called. When trying to partition the x-axis into x columns, the algorithm will start with at most cx clumps" (default: 15) - **missing_value** value to be considered missing value in x and y (default: None) Return: - dictionary with keys 'MIC', 'non_linearity', 'MAS', 'MEV', 'MCN' and 'pearson' corresponding to the maximum information coefficient, non-linearity, maximum asymmetry score, maximum edge value, minimum cell number and Pearson correlation coefficient, respectively Notes: - the two input vectors must be of equal length - missing values in either x or y must be reported as missing_value values See: D. Reshef, Y. Reshef, H. Finucane, S. Grossman, G. McVean, P. Turnbaugh, E. Lander, M. Mitzenmacher, P. Sabeti. Detecting novel associations in large datasets. Science 334, 6062 (2011). """ if (len(x) != len(y)): raise ValueError("The two vectors must be of equal length") # convert missing values x = [NaN if (item == missing_value) else item for item in x] y = [NaN if (item == missing_value) else item for item in y] if (environment == "PYTHON"): x = jpype.JArray(jpype.JFloat, 1)(x) y = jpype.JArray(jpype.JFloat, 1)(y) _silence_output() dataset = VarPairData(x, y) parameters = MineParameters( float(exp), float(c), 0, _null_buffered_writer # debug level, debug stream ) result = Analysis.getResult(Result, dataset, parameters) _restore_output() keys = ("MIC", "non_linearity", "MAS", "MEV", "MCN", "pearson") values = result.toString().split(',')[2:] result_ = {} for key, value in zip(keys, values): if (value == '') or (value == "ERROR"): value = None else: value = float(value) result_[key] = value return result_
def testSetFromNPFloatArrayInt(self): import numpy as np a = np.array([1, 2, 3], np.int32) jarr = jpype.JArray(jpype.JFloat)(len(a)) jarr[:] = a self.assertCountEqual(a, jarr)
def _java_array_byte(data): return jpype.JArray(jpype.JByte, 1)(data)
def testArrayCtor2(self): jobject = jpype.JClass('java.util.List') jarray = jpype.JArray(jobject) self.assertTrue(issubclass(jarray, jpype.JArray)) self.assertTrue(isinstance(jarray(10), jpype.JArray))
def testJArrayConversionBool(self): expected = [True, False, False, True] jarr = jpype.JArray(jpype.JBoolean)(expected) self.assertCountEqual(expected, jarr[:])
def testArrayCtor4(self): jarray = jpype.JArray(jpype.JObject) self.assertTrue(issubclass(jarray, jpype.JArray)) self.assertTrue(isinstance(jarray(10), jpype.JArray))
def testJArrayFail2(self): with self.assertRaises(TypeError): jpype.JArray(jpype.JInt, 1)(1, 2, 3)
def testJArrayGetItemSlice(self): with self.assertRaises(NotImplementedError): ja = jpype.JArray(jpype.JInt)([1, 2, 3, 4]) ja[0:2:-1]