Exemple #1
0
    def load_data2dataframe(
            self,
            solver_params,
            MAEs=True,
            MAE_SS_flat=False,
            allowgenerror=False,
            data2dict_kw=dict(),
            drop_traces=False,
    ):
        """Load data for solver_params and save to DataFrame"""

        df = pd.DataFrame()

        for pert_method, adaptive, methods, step_params, *pert_params in solver_params:
            pert_params = ['auto'] if len(pert_params) == 0 else pert_params[0]

            methods = plot_utils.sort_methods(methods)

            for method, step_param, pert_param in itproduct(
                    methods, step_params, pert_params):
                if df.shape[0] > 0:
                    previous_data = df.loc[(df['method'] == method)
                                           & (df['adaptive'] == adaptive) &
                                           (df['step_param'] == step_param) &
                                           (df['pert_method'] == pert_method) &
                                           (df['pert_param'] == pert_param)]
                    already_loaded = previous_data.shape[0] > 0
                else:
                    already_loaded = False

                if not already_loaded:
                    dd = self.load_data2dict(
                        method=method,
                        adaptive=adaptive,
                        step_param=step_param,
                        pert_method=pert_method,
                        pert_param=pert_param,
                        MAEs=MAEs,
                        MAE_SS_flat=MAE_SS_flat,
                        allowgenerror=allowgenerror,
                        data2dict_kw=data2dict_kw,
                    )

                    if drop_traces:
                        for key in [
                                'ts', 'vs', 'ys', 'acc_ts', 'acc_vs', 'acc_ys',
                                'det_ts', 'det_vs', 'det_ys'
                        ]:
                            if key in dd: del dd[key]
                    df = df.append(dd, ignore_index=True)
                else:
                    print(
                        f"Found duplicate for {method} {adaptive} {step_param}"
                    )

        df['adaptive'] = df['adaptive'].astype(int)
        df['n_samples'] = df['n_samples'].astype(int)

        return df
Exemple #2
0
 def find_similar_products(self, products1, products2):
     d = {}
     for p1, p2 in itproduct(products1, products2):
         print(d)
         d[cal_sim_product(p1['tags'], p2['tags'],
                           return_diff=False)] = [p1, p2]
     for sim, (p1,
               p2) in sorted(d.items(),
                             itemgetter(0))[-self.top_related_products:]:
         yield p1, p2
Exemple #3
0
 def find_similar_products(self, products1, products2):
     # Calculate the similarity of the products that are related to user tags and
     # The products that are related to the main product itself.
     d = {}
     for p1, p2 in itproduct(products1, products2):
         d[cal_sim_product(p1['tags'], p2['tags'],
                           return_diff=False)] = [p1, p2]
     for sim, (p1,
               p2) in sorted(d.items(),
                             itemgetter(0))[-self.top_related_products:]:
         yield p1, p2
Exemple #4
0
 def test_construction_isParallel(self):
     """
     Tests the setting of attributes _isUpdateParallel and _isEstimationParallel of
     MultiMomentEstimator from keywords isParallel, isUpdateParallel and
     isEstimationParallel.
     """
     for is_parallel, is_update_parallel, is_estimation_parallel in itproduct(
         (None, False, True), repeat=3):
         kwArgs = {
             "order": 1,
             "indexSetDimension": 1,
             "variableDimension": 2,
         }
         if is_parallel is not None:
             kwArgs["isParallel"] = is_parallel
         if is_update_parallel is not None:
             kwArgs["isUpdateParallel"] = is_update_parallel
         if is_estimation_parallel is not None:
             kwArgs["isEstimationParallel"] = is_estimation_parallel
         estimator = MultiMomentEstimator(**kwArgs)
         with self.subTest(
                 isParallel=is_parallel,
                 isUpdateParallel=is_update_parallel,
                 msg=(f"Testing construction with isParallel {is_parallel} "
                      f"and isUpdateParallel {is_update_parallel}"),
         ):
             if is_update_parallel is not None:
                 self.assertEqual(estimator._isUpdateParallel,
                                  is_update_parallel)
             elif is_parallel is not None:
                 self.assertEqual(estimator._isUpdateParallel, is_parallel)
             else:
                 self.assertTrue(estimator._isUpdateParallel)
         with self.subTest(
                 isParallel=is_parallel,
                 isEstimationParallel=is_estimation_parallel,
                 msg=(f"Testing construction with isParallel {is_parallel} "
                      f"and isEstimationParallel {is_update_parallel}"),
         ):
             if is_estimation_parallel is not None:
                 self.assertEqual(estimator._isEstimationParallel,
                                  is_estimation_parallel)
             elif is_parallel is not None:
                 self.assertEqual(estimator._isEstimationParallel,
                                  is_parallel)
             else:
                 self.assertTrue(estimator._isEstimationParallel)
Exemple #5
0
 def _initialisePowerSums(self):
     # Generate all combinations of self._indexSetDimension+1 integers between 0
     # and twice the maximal order of moments to compute.
     maxPower = 2 * self.order
     powers = itproduct(range(maxPower + 1), repeat=2 ** self._indexSetDimension)
     powerSumsKeys = [
         # concatenate as strings each list of integers
         "".join(map(str, p))
         # from every combination of integers whose sum is in ]0,maxPower]
         for p in powers
         if 0 < sum(p) <= maxPower
     ]
     # Generate list of dictionaries from these keys;
     # Initial value: NumPy array of zeros, in long format to avoid overflow.
     self._powerSums = {
         k: zeros(self._variableDimension, dtype=float64) for k in powerSumsKeys
     }
Exemple #6
0
 def test_estimationD0(self):
     for order, isError in itproduct(range(1, self.order + 1),
                                     (False, True)):
         key = f"h{order}{'_var' if isError else ''}"
         estimation = get_value_from_remote(
             self._estimator.multiValue(order, isError))
         for c in range(self.variableDimension):
             with self.subTest(
                     msg=
                 (f"{'Variance of ' if isError else ''}h-statistics of order {order}, "
                  f"component {c}"),
                     powerSum=key,
                     component=c,
             ):
                 self.assertAlmostEqual(
                     estimation[c],
                     self.referenceStatistics[key][c],
                 )
Exemple #7
0
    def test_estimation_deterministic(self):
        """
        Test estimation with respect to reference data.
        The reference data is generated by multi-moment_test_data.py
        """
        # Data for deterministic tests
        # The data is assumed to be small, so we store it all
        with open("parameters/multi-moment_test_data.json", "r") as f:
            referenceData = load(f)

        for dim, order, isError in itproduct((0, 1), (1, 2, 3, 4),
                                             (False, True)):
            referenceKey = f"{'Delta-' if dim == 1 else ''}h{order}{'_var' if isError else ''}"
            reference = referenceData[referenceKey]
            # Compute estimation
            estimator = MultiMomentEstimator(order=order)
            samples = referenceData["samples"]
            if dim == 0:
                # Extract samples from coarser (i.e. second) level, but preserve depth
                samples = [[s[1]] for s in samples]
            estimator.update(samples)
            estimation = get_value_from_remote(
                estimator.multiValue(order, isError))
            # Test each component individually
            for c, (est, ref) in enumerate(zip(estimation, reference)):
                if ref != 0:
                    # Consider relative error if possible
                    tol = abs(self.tolerance * ref)
                else:
                    # Absolute error is considered
                    tol = self.tolerance
                with self.subTest(
                        msg=
                    (f"{'Variance of ' if isError else ''}{'Delta ' if dim==1 else ''}"
                     f"h-statistics of order {order}, component {c}"),
                        indexSetDimension=dim,
                        statisticalOrder=order,
                        errorEstimation=isError,
                        component=c,
                ):
                    self.assertAlmostEqual(est, ref, delta=tol)
Exemple #8
0
 def test_estimation_random(self):
     """
     Randomised testing of the estimations. False failures are possible.
     """
     for dim, order, isError in itproduct((0, 1), (1, 2, 3, 4),
                                          (False, True)):
         not_implemented = dim == 1 or (isError and order > 2)
         if not_implemented:
             # Nothing to do
             continue
         if isError:
             reference = gaussianHStatVariance(self.variance, order,
                                               self.numberOfSamples)
         else:
             if order == 1:
                 # order 1: not actually a h-statistics
                 reference = gaussianRawMoment(self.mean, self.variance,
                                               order)
             else:
                 reference = gaussianCentralMoment(self.variance, order)
         me = MultiMomentEstimator(order=order)
         me.update(self._samples(dim))
         estimation = get_value_from_remote(me.multiValue(order, isError))
         # Test each component individually
         for c, (est, ref) in enumerate(zip(estimation, reference)):
             # Consider relative error if possible
             tol = abs(self.tolerance * ref)
             if tol == 0:
                 # Absolute error is considered
                 tol = self.tolerance
             with self.subTest(
                     msg=
                 (f"{'Variance of ' if isError else ''}{'Delta ' if dim==1 else ''}"
                  f"h-statistics of order {order}, component {c}"),
                     indexSetDimension=dim,
                     statisticalOrder=order,
                     errorEstimation=isError,
                     component=c,
             ):
                 self.assertAlmostEqual(est, ref, delta=tol)
Exemple #9
0
                path_postfix=args.path_postfix,
                n_cpus=args.n_cpus,
                save_prob_pictures=args.save_prob_pictures,
                save_item_pictures=args.save_item_pictures,
                keep_prob_pictures=args.keep_prob_pictures,
                keep_item_pictures=args.keep_item_pictures,
                batch_size=args.batch_size,
                reset_cell_on_update=args.reset_cell_on_update,
                log_path=args.log_path)
        else:
            keys, values = zip(*test_dict.items())
            for nexp in test_dict['nexp']:
                for mb in test_dict['mbatch']:
                    assert nexp % mb == 0, f"nexp({nexp}) not divisble by mbatch({mb})"
                    for td in test_dict['timedialation']:
                        assert nexp % td == 0, f"nexp ({nexp}) not divisble by timedialation({td})"
                        assert (
                            nexp // td
                        ) % mb == 0, f'master exp ({nexp}/{td}={nexp//td}) not divisible by mbatch ({mb})'

            for _ in range(NSAMPLES):
                for v in itproduct(*values):
                    run(
                        **dict(zip(keys, v))
                    )  #Run experiment with permutation of values from test_dict
        if PROFILER is not None:
            PROFILER.disable()
    finally:
        if PROFILER is not None:
            PROFILER.print_stats()
Exemple #10
0
def futureArray(array: SampleArray):
    for i, j, k in itproduct(range(len(array)), range(len(array[0])),
                             range(len(array[0][0]))):
        array[i][j][k] = returnInput_Task(array[i][j][k])
    return array
Exemple #11
0
    def parallel_test_isParallel(self):
        """
        Tests the parallel behaviours of estimation and update of MultiMomentEstimator,
        declared by its attributes _isUpdateParallel and _isEstimationParallel. This
        behaviour is either parallel or serial. Both options are tests successively in
        sub-tests. This test only makes sense when run in a parallel framework. Otherwise, the
        sub-test on parallel behaviour will always fail and the one on serial behaviour will
        always pass.

        """
        isd = 1  # index set dimension
        dims = (9, 2**isd, 2)  # events, levels, components
        commonArgs = {
            "order": 1,
            "indexSetDimension": isd,
            "variableDimension": dims[2],
        }
        # Test parallel behaviour
        for is_update_parallel, is_estimation_parallel in itproduct(
            (False, True), repeat=2):
            # Samples have to be generated again every time or COMPSs fails to find them twice
            future_samples = futureArray(
                self._randomGenerator.normal(0, 1, dims).tolist())
            estimator = MultiMomentEstimator(
                isUpdateParallel=is_update_parallel,
                isEstimationParallel=is_estimation_parallel,
                **commonArgs,
            )
            with self.subTest(
                    isUpdateParallel=is_update_parallel,
                    msg=
                    f"Testing {'parallel' if is_update_parallel else 'sequential'} update",
            ):
                estimator.update(future_samples)
                if is_update_parallel:
                    with self.assertRaises(
                            TypeError,
                            msg=
                            "MultiMomentEstimator._powerSums contains lists instead of Future.",
                    ):
                        estimator._powerSums["10"][0]
                else:
                    self.assertIsInstance(estimator._powerSums["10"][0], float)
            with self.subTest(
                    isEstimationParallel=is_estimation_parallel,
                    isUpdateParallel=is_update_parallel,
                    msg=
                (f"Testing {'parallel' if is_update_parallel else 'sequential'} "
                 "estimation, with "
                 f"{'parallel' if is_update_parallel else 'sequential'} update"
                 ),
            ):
                v = estimator.value(1)
                if is_estimation_parallel:
                    with self.assertRaises(
                            TypeError,
                            msg=("MultiMomentEstimator.value returned "
                                 f"{type(v)} instead of Future."),
                    ):
                        v += 1
                else:
                    self.assertIsInstance(v, float)