예제 #1
0
def test_new(n=100):
    calculator_name_2 = 'jidt_kraskov_fast'
    est_2 = Estimator_cmi(calculator_name_2)
    for i in range(n):
        res_2 = est_2.estimate(var1=source_1[1:], var2=target[1:],
                               conditional=target[:-1], opts=opts)
    print('New result {0:.4f} nats; expected:{1:.4f}'.format(res_2,
                                                             expected_res))
예제 #2
0
def test_old(n=100):
    calculator_name_1 = 'jidt_kraskov'
    est_1 = Estimator_cmi(calculator_name_1)
    for i in range(n):
        res_1 = est_1.estimate(var1=source_1[1:], var2=target[1:],
                               conditional=target[:-1], opts=opts)
    print('Old result {0:.4f} nats; expected:{1:.4f}'.format(res_1,
                                                             expected_res))
예제 #3
0
 def __init__(self, max_lag, min_lag, cmi_calculator_name, target,
              source_set=None):
     self.max_lag = max_lag
     self.min_lag = min_lag
     self.estimator_name = cmi_calculator_name
     self._cmi_estimator = Estimator_cmi(cmi_calculator_name)  # TODO should be 'calculator'
     # TODO add kwargs for the estimator
     self.source_set = source_set
     self.target = target
     self.sign_omnibus = None
     self.sign_individual_sources = None
     self.pvalue_omnibus = None
     self.pvalues_individual_sources = None
     super(Multivariate_te, self).__init__(max_lag, target)
예제 #4
0
class Multivariate_te(Network_analyses):
    """Set up a network analysis using multivariate transfer entropy.

    Set parameters necessary for network inference using transfer entropy (TE).
    To perform network inference call analyse_network() on an instance of the
    data class.

    Args:
        max_lag: maximum number of steps into the past to look for informative
            samples (maximum temporal search depth)
        min_lag: minimum number of steps into the past to look for informative
            samples (minimum temporal search depth)
        cmi_calculator_name: string with the name of the calculator to be used
            for TE estimation
        target: index of the target process
        source_set: list of process indices used as potential sources (default:
            all possible processes, i.e., all processes other than the target
            process)

    Attributes:
        analyse_network: perform network inference on data, has to be run to
            first to write results to other attributes
        conditional_full: samples in the full conditional set
        conditional_sources: samples in the conditional set coming from souces
        conditional_target: samples in the conditional set coming from target
        current_value: index of the current value in TE estimation
        estimator_name: estimator used for TE estimation
        max_lag: maximum temporal search depth
        min_lag: minimum temporal search depth
        pvalue_omnibus: p-value of the omnibus test
        pvalue_individual_sources: array of p-values for TE from individual
            sources to the target
        sign_ominbus: statistical significance of the over-all TE
        sign_individual: array of booleans, indicates statistical significance
            of TE from individual sources to the target
        source_set: list with indices of source processes
        target: index of target process
    """
    def __init__(self, max_lag, min_lag, cmi_calculator_name, target,
                 source_set=None):
        self.max_lag = max_lag
        self.min_lag = min_lag
        self.estimator_name = cmi_calculator_name
        self._cmi_estimator = Estimator_cmi(cmi_calculator_name)  # TODO should be 'calculator'
        # TODO add kwargs for the estimator
        self.source_set = source_set
        self.target = target
        self.sign_omnibus = None
        self.sign_individual_sources = None
        self.pvalue_omnibus = None
        self.pvalues_individual_sources = None
        super(Multivariate_te, self).__init__(max_lag, target)

    def analyse_network(self, data):  # this should allow for multiple targets
        """Find multivariate transfer entropy between sources and a target.

        Find multivariate transfer entropy between all source processes and the
        target process. Uses multivariate, non-uniform embedding found through
        information maximisation (see Faes, ???, and Lizier, 2012). This is
        done in four steps (see Lizier and Faes for details):
        (1) find all relevant samples in the target processes' own past, by
            iteratively adding candidate samples that have significant
            conditional mutual information (CMI) with the current value
            (conditional on all samples that were added previously)
        (2) find all relevant samples in the source processes' pasts (again
            by finding all candidates with significant CMI)
        (3) prune the final conditional set by testing the CMI between each
            sample in the final set and the current value, conditional on all
            other samples in the final set
        (4) statistics on the final set of sources (test for over-all transfer
            between the final conditional set and the current value, and for
            significant transfer of all individual samples in the set)
        """
        self._current_value_realisations = data.get_realisations(
                                                    analysis_setup=self,
                                                    idx=[self.current_value])
        self._check_source_set(data.n_processes)

        print('---------------------------- (1) include target candidates')
        self._include_target_candidates(data)

        print('---------------------------- (2) include source candidates')
        self._include_source_candidates(data)

        print('---------------------------- (3) pruning step for {0} '
              'candidates'.format(len(self.conditional_sources)))
        self._prune_candidates(data)

        print('---------------------------- (4) final statistics')
        self._test_final_conditional(data)

        if VERBOSE:
            print('finalsource samples: {0}'.format(self.conditional_sources))
            print('final target samples: {0}'.format(self.conditional_target))
        self._clean_up()
        return

    def _check_source_set(self, n_processes):
        """Set default if no source set was provided by the user."""
        if self.source_set is None:
            self.source_set = [x for x in range(n_processes)]
            self.source_set.pop(self.target)
            if VERBOSE:
                print('Testing sources {0}'.format(self.source_set))
        else:
            if type(self.source_set) is not list:
                raise TypeError('Source set has to be a list.')

    def _include_target_candidates(self, data):
        """Test candidates from the target's past."""
        candidates = self._define_candidates(processes=[self.target],
                                             samples=np.arange(self.max_lag))  # TODO switch back to actual *lags'*
                                             # samples=np.arange(self.max_lag, 0, -1))
        sources_found = self._find_conditional(candidates, data)
        if not sources_found:
            print(('No informative sources in the target''s past - ' +
                   'adding point at t-1 in the target'))
            idx = (self.current_value[0], self.current_value[1] - 1)
            realisations = data.get_realisations(self, [idx])
            self._append_conditional_idx(idx)
            self._append_conditional_realisations(realisations)

    def _include_source_candidates(self, data):  # TODO something's slow here
        """Test candidates in the source's past."""
        candidates = self._define_candidates(
                                         processes=self.source_set,
                                         samples=np.arange(self.max_lag -
                                                           self.min_lag + 1))
        _ = self._find_conditional(candidates, data)

    def _test_final_conditional(self, data):
        """Perform statistical test on the final conditional set."""
        if not self.conditional_sources:
            print('---------------------------- no sources found')
        else:
            print(self.conditional_full)
            [s, p] = stats.omnibus_test(self, data)
            self.sign_omnibus = s
            self.pvalue_omnibus = p
            if self.sign_omnibus:
                [s, p] = stats.max_statistic_sequential(self, data)
                self.sign_individual_sources = s
                self.pvalues_individual_sources = p

    def _define_candidates(self, processes, samples):
        """Build a list of candidates' indices.

            Args:
                processes: a list of integers representing process indices
                samples: a list of integers representing sample indices of the
                    process

            Returns:
                a list of tuples, where each tuple holds the index of one
                candidate and has the form (process index, sample index)
        """
        candidate_set = []
        for idx in it.product(processes, samples):
            candidate_set.append(idx)
        return candidate_set

    def _find_conditional(self, candidate_set, data):
        """Find informative conditioning set from a set of candidate samples.

        Loop over each candidate in the candidate set and test if it has
        significant mutual information with the current value, conditional
        on all samples that were informative in previous rounds. If this
        conditional mutual information is significant, add it to the
        conditional set.

        Args:
            data: instance of Data class
            cmi_estimator: instance of Estimator_cmi class
            candidate_set: list of tuples, where each tuple is an index with
                entries (process index, sample index)
            conditional_set: if available, list with indices of samples
                already in the conditional set (default is an empty numpy
                array), new sources are added to the array
            conditional_realisations: if available, realisations of samples
                already in the conditional set (default is an empty numpy
                array), realisations of new sources are added to the array

        Returns:
            conditional_set: list of indices with informative sources from
                the candidate set
            conditional_realisations: numpy array with realisations of the
                conditional set
        """
        success = False
        while candidate_set:
            if VERBOSE:
                print('find conditionals in set {0}'.format(candidate_set))
            temp_te = np.empty(len(candidate_set))
            i = 0
            for candidate in candidate_set:
                candidate_realisations = data.get_realisations(self,
                                                               [candidate])
                temp_te[i] = self._cmi_estimator.estimate(
                                        candidate_realisations,
                                        self._current_value_realisations,
                                        self._conditional_realisations)
            te_max_candidate = max(temp_te)
            max_candidate = candidate_set.pop(np.argmax(temp_te)) # TO_DO correct?
            significant = stats.max_statistic(self, data, candidate_set,
                                              te_max_candidate)
            if significant:
                success = True
                self._append_conditional_idx([max_candidate])
                self._append_conditional_realisations(
                                data.get_realisations(self, [max_candidate]))
            else:
                break

        return success

    def _prune_candidates(self, data):
        """Remove uninformative candidates from the final conditional set.

        For each sample in the final conditioning set, check if it is
        informative about the current value given all other samples in the
        final set. If a sample is not informative, it is removed from the
        final set.

        Args:
            data: instance of Data class
            conditional_set: set of informative sources about the target

        Returns:
            conditional_set_pruned: list of indices of samples in the
            conditional set after removal of spurious samples
        """
        test_set_1 = cp.copy(self.conditional_sources)
        i = 0
        for candidate in reversed(test_set_1):  # TODO loop over all, take minimum (see steps 1 and 2)
            if len(test_set_1) == 1:  # TODO is this alright?
                break

            if VERBOSE:
                print('pruning candidate {0}'.format(i + 1))
            # TODO 2 options: (1) use the conditional realisations from
            # earlier, cut current candidate from the array and use the
            # rest as temp conditional; (2) stick with the current
            # implementation and always read stuff from data
            # we currently do option (1) -> profile this
            [temp_cond, temp_cand] = self._remove_realisation(
                                                self.conditional_sources,
                                                candidate)
            temp_te = self._cmi_estimator.estimate(
                                        temp_cand,
                                        self._current_value_realisations,
                                        temp_cond)
            test_set_2 = cp.copy(self.conditional_sources)  # TODO check this
            test_set_2.pop(test_set.index(candidate))
            significant = stats.min_statistic(self, data, test_set_2, temp_te)
            if not significant:
                self._remove_candidate(candidate)
            i += 1

    def _remove_realisation(self, idx_full, idx_single):
        """Remove single indexes' realisations from a set of realisations.

        Remove the realisations of a single index from a set of realisations.
        Return both the single realisation and realisations for the remaining
        set. This allows us to reuse the collected realisations when pruning
        the conditional set after candidates have been included.

        Args:
            idx_full: list of indices indicating the full set
            idx_single: index to be removed

        Returns:
            realisation_single: numpy array with realisations for single index
            realisations_remaining: numpy array with remaining realisations
        """
        assert(len(idx_full) > 1), ('No remaining realisations after removal '
                                    'of single realisation.')
        index_single = idx_full.index(idx_single)
        indices_full = np.zeros(len(idx_full)).astype(int)
        i = 0
        for idx in idx_full:
            indices_full[i] = self.conditional_full.index(idx)
            i += 1
        realisations_full = self._conditional_realisations[:, indices_full]
        realisations_single = np.expand_dims(
                                            realisations_full[:, index_single],
                                            axis=1)
        realisations_remaining = utils.remove_column(realisations_full,
                                                     index_single)
        return realisations_remaining, realisations_single

    def _clean_up(self):
        """Remove temporary data at the end of the analysis."""
        self._current_value_realisations = None
        self._conditional_sources_realisations = None
        self._conditional_target_realisations = None
        self._current_value_realisations = None