def load_data2dataframe( self, solver_params, MAEs=True, MAE_SS_flat=False, allowgenerror=False, data2dict_kw=dict(), drop_traces=False, ): """Load data for solver_params and save to DataFrame""" df = pd.DataFrame() for pert_method, adaptive, methods, step_params, *pert_params in solver_params: pert_params = ['auto'] if len(pert_params) == 0 else pert_params[0] methods = plot_utils.sort_methods(methods) for method, step_param, pert_param in itproduct( methods, step_params, pert_params): if df.shape[0] > 0: previous_data = df.loc[(df['method'] == method) & (df['adaptive'] == adaptive) & (df['step_param'] == step_param) & (df['pert_method'] == pert_method) & (df['pert_param'] == pert_param)] already_loaded = previous_data.shape[0] > 0 else: already_loaded = False if not already_loaded: dd = self.load_data2dict( method=method, adaptive=adaptive, step_param=step_param, pert_method=pert_method, pert_param=pert_param, MAEs=MAEs, MAE_SS_flat=MAE_SS_flat, allowgenerror=allowgenerror, data2dict_kw=data2dict_kw, ) if drop_traces: for key in [ 'ts', 'vs', 'ys', 'acc_ts', 'acc_vs', 'acc_ys', 'det_ts', 'det_vs', 'det_ys' ]: if key in dd: del dd[key] df = df.append(dd, ignore_index=True) else: print( f"Found duplicate for {method} {adaptive} {step_param}" ) df['adaptive'] = df['adaptive'].astype(int) df['n_samples'] = df['n_samples'].astype(int) return df
def find_similar_products(self, products1, products2): d = {} for p1, p2 in itproduct(products1, products2): print(d) d[cal_sim_product(p1['tags'], p2['tags'], return_diff=False)] = [p1, p2] for sim, (p1, p2) in sorted(d.items(), itemgetter(0))[-self.top_related_products:]: yield p1, p2
def find_similar_products(self, products1, products2): # Calculate the similarity of the products that are related to user tags and # The products that are related to the main product itself. d = {} for p1, p2 in itproduct(products1, products2): d[cal_sim_product(p1['tags'], p2['tags'], return_diff=False)] = [p1, p2] for sim, (p1, p2) in sorted(d.items(), itemgetter(0))[-self.top_related_products:]: yield p1, p2
def test_construction_isParallel(self): """ Tests the setting of attributes _isUpdateParallel and _isEstimationParallel of MultiMomentEstimator from keywords isParallel, isUpdateParallel and isEstimationParallel. """ for is_parallel, is_update_parallel, is_estimation_parallel in itproduct( (None, False, True), repeat=3): kwArgs = { "order": 1, "indexSetDimension": 1, "variableDimension": 2, } if is_parallel is not None: kwArgs["isParallel"] = is_parallel if is_update_parallel is not None: kwArgs["isUpdateParallel"] = is_update_parallel if is_estimation_parallel is not None: kwArgs["isEstimationParallel"] = is_estimation_parallel estimator = MultiMomentEstimator(**kwArgs) with self.subTest( isParallel=is_parallel, isUpdateParallel=is_update_parallel, msg=(f"Testing construction with isParallel {is_parallel} " f"and isUpdateParallel {is_update_parallel}"), ): if is_update_parallel is not None: self.assertEqual(estimator._isUpdateParallel, is_update_parallel) elif is_parallel is not None: self.assertEqual(estimator._isUpdateParallel, is_parallel) else: self.assertTrue(estimator._isUpdateParallel) with self.subTest( isParallel=is_parallel, isEstimationParallel=is_estimation_parallel, msg=(f"Testing construction with isParallel {is_parallel} " f"and isEstimationParallel {is_update_parallel}"), ): if is_estimation_parallel is not None: self.assertEqual(estimator._isEstimationParallel, is_estimation_parallel) elif is_parallel is not None: self.assertEqual(estimator._isEstimationParallel, is_parallel) else: self.assertTrue(estimator._isEstimationParallel)
def _initialisePowerSums(self): # Generate all combinations of self._indexSetDimension+1 integers between 0 # and twice the maximal order of moments to compute. maxPower = 2 * self.order powers = itproduct(range(maxPower + 1), repeat=2 ** self._indexSetDimension) powerSumsKeys = [ # concatenate as strings each list of integers "".join(map(str, p)) # from every combination of integers whose sum is in ]0,maxPower] for p in powers if 0 < sum(p) <= maxPower ] # Generate list of dictionaries from these keys; # Initial value: NumPy array of zeros, in long format to avoid overflow. self._powerSums = { k: zeros(self._variableDimension, dtype=float64) for k in powerSumsKeys }
def test_estimationD0(self): for order, isError in itproduct(range(1, self.order + 1), (False, True)): key = f"h{order}{'_var' if isError else ''}" estimation = get_value_from_remote( self._estimator.multiValue(order, isError)) for c in range(self.variableDimension): with self.subTest( msg= (f"{'Variance of ' if isError else ''}h-statistics of order {order}, " f"component {c}"), powerSum=key, component=c, ): self.assertAlmostEqual( estimation[c], self.referenceStatistics[key][c], )
def test_estimation_deterministic(self): """ Test estimation with respect to reference data. The reference data is generated by multi-moment_test_data.py """ # Data for deterministic tests # The data is assumed to be small, so we store it all with open("parameters/multi-moment_test_data.json", "r") as f: referenceData = load(f) for dim, order, isError in itproduct((0, 1), (1, 2, 3, 4), (False, True)): referenceKey = f"{'Delta-' if dim == 1 else ''}h{order}{'_var' if isError else ''}" reference = referenceData[referenceKey] # Compute estimation estimator = MultiMomentEstimator(order=order) samples = referenceData["samples"] if dim == 0: # Extract samples from coarser (i.e. second) level, but preserve depth samples = [[s[1]] for s in samples] estimator.update(samples) estimation = get_value_from_remote( estimator.multiValue(order, isError)) # Test each component individually for c, (est, ref) in enumerate(zip(estimation, reference)): if ref != 0: # Consider relative error if possible tol = abs(self.tolerance * ref) else: # Absolute error is considered tol = self.tolerance with self.subTest( msg= (f"{'Variance of ' if isError else ''}{'Delta ' if dim==1 else ''}" f"h-statistics of order {order}, component {c}"), indexSetDimension=dim, statisticalOrder=order, errorEstimation=isError, component=c, ): self.assertAlmostEqual(est, ref, delta=tol)
def test_estimation_random(self): """ Randomised testing of the estimations. False failures are possible. """ for dim, order, isError in itproduct((0, 1), (1, 2, 3, 4), (False, True)): not_implemented = dim == 1 or (isError and order > 2) if not_implemented: # Nothing to do continue if isError: reference = gaussianHStatVariance(self.variance, order, self.numberOfSamples) else: if order == 1: # order 1: not actually a h-statistics reference = gaussianRawMoment(self.mean, self.variance, order) else: reference = gaussianCentralMoment(self.variance, order) me = MultiMomentEstimator(order=order) me.update(self._samples(dim)) estimation = get_value_from_remote(me.multiValue(order, isError)) # Test each component individually for c, (est, ref) in enumerate(zip(estimation, reference)): # Consider relative error if possible tol = abs(self.tolerance * ref) if tol == 0: # Absolute error is considered tol = self.tolerance with self.subTest( msg= (f"{'Variance of ' if isError else ''}{'Delta ' if dim==1 else ''}" f"h-statistics of order {order}, component {c}"), indexSetDimension=dim, statisticalOrder=order, errorEstimation=isError, component=c, ): self.assertAlmostEqual(est, ref, delta=tol)
path_postfix=args.path_postfix, n_cpus=args.n_cpus, save_prob_pictures=args.save_prob_pictures, save_item_pictures=args.save_item_pictures, keep_prob_pictures=args.keep_prob_pictures, keep_item_pictures=args.keep_item_pictures, batch_size=args.batch_size, reset_cell_on_update=args.reset_cell_on_update, log_path=args.log_path) else: keys, values = zip(*test_dict.items()) for nexp in test_dict['nexp']: for mb in test_dict['mbatch']: assert nexp % mb == 0, f"nexp({nexp}) not divisble by mbatch({mb})" for td in test_dict['timedialation']: assert nexp % td == 0, f"nexp ({nexp}) not divisble by timedialation({td})" assert ( nexp // td ) % mb == 0, f'master exp ({nexp}/{td}={nexp//td}) not divisible by mbatch ({mb})' for _ in range(NSAMPLES): for v in itproduct(*values): run( **dict(zip(keys, v)) ) #Run experiment with permutation of values from test_dict if PROFILER is not None: PROFILER.disable() finally: if PROFILER is not None: PROFILER.print_stats()
def futureArray(array: SampleArray): for i, j, k in itproduct(range(len(array)), range(len(array[0])), range(len(array[0][0]))): array[i][j][k] = returnInput_Task(array[i][j][k]) return array
def parallel_test_isParallel(self): """ Tests the parallel behaviours of estimation and update of MultiMomentEstimator, declared by its attributes _isUpdateParallel and _isEstimationParallel. This behaviour is either parallel or serial. Both options are tests successively in sub-tests. This test only makes sense when run in a parallel framework. Otherwise, the sub-test on parallel behaviour will always fail and the one on serial behaviour will always pass. """ isd = 1 # index set dimension dims = (9, 2**isd, 2) # events, levels, components commonArgs = { "order": 1, "indexSetDimension": isd, "variableDimension": dims[2], } # Test parallel behaviour for is_update_parallel, is_estimation_parallel in itproduct( (False, True), repeat=2): # Samples have to be generated again every time or COMPSs fails to find them twice future_samples = futureArray( self._randomGenerator.normal(0, 1, dims).tolist()) estimator = MultiMomentEstimator( isUpdateParallel=is_update_parallel, isEstimationParallel=is_estimation_parallel, **commonArgs, ) with self.subTest( isUpdateParallel=is_update_parallel, msg= f"Testing {'parallel' if is_update_parallel else 'sequential'} update", ): estimator.update(future_samples) if is_update_parallel: with self.assertRaises( TypeError, msg= "MultiMomentEstimator._powerSums contains lists instead of Future.", ): estimator._powerSums["10"][0] else: self.assertIsInstance(estimator._powerSums["10"][0], float) with self.subTest( isEstimationParallel=is_estimation_parallel, isUpdateParallel=is_update_parallel, msg= (f"Testing {'parallel' if is_update_parallel else 'sequential'} " "estimation, with " f"{'parallel' if is_update_parallel else 'sequential'} update" ), ): v = estimator.value(1) if is_estimation_parallel: with self.assertRaises( TypeError, msg=("MultiMomentEstimator.value returned " f"{type(v)} instead of Future."), ): v += 1 else: self.assertIsInstance(v, float)