Ejemplo n.º 1
0
    def global_position(self):
        #don't need to move origin to get rotation as this translation is implicit in the other measurements
        #change so only executed once when the local aoa is set.
        if isinf(self.local_position):
            return_value = -inf
        else:

            # This linear algebra is nice but actually all that we need to do is add the delta between the global reading
            # and the local reading to get the correct result should be less expensive in terms of calculation as well

            return_value = modulo_heading(
                self.local_position + self.sensor_properties.location.heading)

            #local_sensor_position = array([sqrt(2) * cos(self.local_aoa), sqrt(2) * sin(self.local_aoa)])
            #global_particle_position = dot(linalg.inv(self.sensor_properties.rotation_matrix), local_sensor_position)
            #return_value = cartesian_to_polar(global_particle_position[0][0],global_particle_position[0][1],
            #                     response_units=AngleUnits.radians)[1]

        return return_value
    def global_position(self):
        #don't need to move origin to get rotation as this translation is implicit in the other measurements
        #change so only executed once when the local aoa is set.
        if isinf(self.local_position):
            return_value = -inf
        else:


            # This linear algebra is nice but actually all that we need to do is add the delta between the global reading
            # and the local reading to get the correct result should be less expensive in terms of calculation as well

            return_value = modulo_heading(self.local_position + self.sensor_properties.location.heading)

            #local_sensor_position = array([sqrt(2) * cos(self.local_aoa), sqrt(2) * sin(self.local_aoa)])
            #global_particle_position = dot(linalg.inv(self.sensor_properties.rotation_matrix), local_sensor_position)
            #return_value = cartesian_to_polar(global_particle_position[0][0],global_particle_position[0][1],
            #                     response_units=AngleUnits.radians)[1]

        return return_value
 def local_position(self, arg_aoa):
     if isinf(arg_aoa):
         self._aoa = -inf
     else:
         self._aoa = modulo_heading(arg_aoa)
Ejemplo n.º 4
0
def isinf(x):
    if isinstance(x, IDataDescriptor):
        return _um.isinf(dd_as_py(x))
    else:
        return _um.isinf(x)
Ejemplo n.º 5
0
    def get_format_func(self, elem, **options):
        missing_opt = self.check_options(**options)
        if missing_opt:
            raise Exception("Missing options: {}".format(missing_opt))

        floatmode = options['floatmode']
        precision = None if floatmode == 'unique' else options['precision']
        suppress_small = options['suppress_small']
        sign = options['sign']
        infstr = options['infstr']
        nanstr = options['nanstr']
        exp_format = False
        pad_left, pad_right = 0, 0

        # only the finite values are used to compute the number of digits
        finite = umath.isfinite(elem)
        finite_vals = elem[finite]
        nonfinite_vals = elem[~finite]

        # choose exponential mode based on the non-zero finite values:
        abs_non_zero = umath.absolute(finite_vals[finite_vals != 0])
        if len(abs_non_zero) != 0:
            max_val = np.max(abs_non_zero)
            min_val = np.min(abs_non_zero)
            with np.errstate(over='ignore'):  # division can overflow
                if max_val >= 1.e8 or (not suppress_small and
                                       (min_val < 0.0001
                                        or max_val / min_val > 1000.)):
                    exp_format = True

        # do a first pass of printing all the numbers, to determine sizes
        if len(finite_vals) == 0:
            trim, exp_size, unique = '.', -1, True
        elif exp_format:
            trim, unique = '.', True
            if floatmode == 'fixed':
                trim, unique = 'k', False
            strs = (format_float_scientific(x,
                                            precision=precision,
                                            unique=unique,
                                            trim=trim,
                                            sign=sign == '+')
                    for x in finite_vals)
            frac_strs, _, exp_strs = zip(*(s.partition('e') for s in strs))
            int_part, frac_part = zip(*(s.split('.') for s in frac_strs))
            exp_size = max(len(s) for s in exp_strs) - 1

            trim = 'k'
            precision = max(len(s) for s in frac_part)

            # this should be only 1 or 2. Can be calculated from sign.
            pad_left = max(len(s) for s in int_part)
            # pad_right is only needed for nan length calculation
            pad_right = exp_size + 2 + precision

            unique = False
        else:
            trim, unique = '.', True
            if floatmode == 'fixed':
                trim, unique = 'k', False
            strs = (format_float_positional(x,
                                            precision=precision,
                                            fractional=True,
                                            unique=unique,
                                            trim=trim,
                                            sign=sign == '+')
                    for x in finite_vals)
            int_part, frac_part = zip(*(s.split('.') for s in strs))
            pad_left = max(len(s) for s in int_part)
            pad_right = max(len(s) for s in frac_part)
            exp_size = -1

            if floatmode in ['fixed', 'maxprec_equal']:
                precision = pad_right
                unique = False
                trim = 'k'
            else:
                unique = True
                trim = '.'

        # account for sign = ' ' by adding one to pad_left
        if sign == ' ' and not any(np.signbit(finite_vals)):
            pad_left += 1

        # account for nan and inf in pad_left
        if len(nonfinite_vals) != 0:
            nanlen, inflen = 0, 0
            if np.any(umath.isinf(nonfinite_vals)):
                neginf = sign != '-' or np.any(np.isneginf(nonfinite_vals))
                inflen = len(infstr) + neginf
            if np.any(umath.isnan(elem)):
                nanlen = len(nanstr)
            offset = pad_right + 1  # +1 for decimal pt
            pad_left = max(nanlen - offset, inflen - offset, pad_left)

        def print_nonfinite(x):
            with errstate(invalid='ignore'):
                if umath.isnan(x):
                    ret = ('+' if sign == '+' else '') + nanstr
                else:  # isinf
                    infsgn = '-' if x < 0 else '+' if sign == '+' else ''
                    ret = infsgn + infstr
                return ' ' * (pad_left + pad_right + 1 - len(ret)) + ret

        if exp_format:

            def print_finite(x):
                return format_float_scientific(x,
                                               precision=precision,
                                               unique=unique,
                                               trim=trim,
                                               sign=sign == '+',
                                               pad_left=pad_left,
                                               exp_digits=exp_size)
        else:

            def print_finite(x):
                return format_float_positional(x,
                                               precision=precision,
                                               unique=unique,
                                               fractional=True,
                                               trim=trim,
                                               sign=sign == '+',
                                               pad_left=pad_left,
                                               pad_right=pad_right)

        def fmt(x):
            if umath.isfinite(x):
                return print_finite(x)
            else:
                return print_nonfinite(x)

        return fmt
Ejemplo n.º 6
0
def isinf(x):
    if isinstance(x, IDataDescriptor):
        return _um.isinf(dd_as_py(x))
    else:
        return _um.isinf(x)
Ejemplo n.º 7
0
def isinf(x):
    if isinstance(x, DDesc):
        return _um.isinf(ddesc_as_py(x))
    else:
        return _um.isinf(x)
Ejemplo n.º 8
0
    def sample(self, num_iterations):
        """
        for num_iters:
            for each customer
                remove him from his old_table and update the table params.
                if old_table is empty:
                    remove table
                Calculate prior and likelihood for this customer sitting at each table
                sample for a table index
                if new_table is equal to old_table
                    don't have to update the parameters
                else update params of the old table.
        """
        if self.show_topics is not None:
            print("Topics after initialization")
            print(self.format_topics())
            # Compute the overall usage of topics across the training corpus
            topic_props = self.table_counts_per_doc.sum(axis=1).astype(
                np.float64)
            topic_props /= topic_props.sum()
            print("Words using topics: {}".format(", ".join(
                "{}={:.1f}%".format(i, prop)
                for i, prop in enumerate(topic_props * 100.))))
            topic_doc_props = (self.table_counts_per_doc > 0).astype(
                np.float64).sum(axis=1)
            topic_doc_props /= self.num_documents
            print("Docs using topics: {}".format(", ".join(
                "{}={:.1f}%".format(i, prop)
                for i, prop in enumerate(topic_doc_props * 100.))))

        with VoseAliasUpdater(
                self.aliases,
                self.vocab_embeddings,
                self.prior.kappa,
                self.prior.nu,
                self.table_counts,
                self.table_means,
                self.table_cholesky_ltriangular_mat,
                self.log_determinants,
                das_normalization=self.das_normalization,
        ) as alias_updater:
            for iteration in range(num_iterations):
                stats = SamplingDiagnostics()
                self.log.info("Iteration {}".format(iteration))

                alias_updater.unpause()
                pbar = get_progress_bar(len(self.corpus),
                                        title="Sampling",
                                        show_progress=self.show_progress)
                for d, doc in enumerate(pbar(self.corpus)):
                    if self.show_topics is not None and self.show_topics > 0 and d % self.show_topics == 0:
                        print("Topics after {:,} docs".format(d))
                        print(self.format_topics())

                    for w, cust_id in enumerate(doc):
                        x = self.vocab_embeddings[cust_id]

                        # Remove custId from his old_table
                        old_table_id = self.table_assignments[d][w]
                        self.table_assignments[d][
                            w] = -1  # Doesn't really make any difference, as only counts are used
                        with self.table_counts.lock:
                            self.table_counts.np[old_table_id] -= 1
                        self.table_counts_per_doc[old_table_id, d] -= 1
                        # Update vector means etc
                        self.sum_squared_table_customers[
                            old_table_id] -= np.outer(x, x)

                        # Topic 'old_table_id' now has one member fewer
                        # Just update params for this customer
                        self.update_table_params(old_table_id,
                                                 cust_id,
                                                 is_removed=True)

                        # Under the alias method, we only do the full likelihood computation for topics
                        # that already have a non-zero count in the current document
                        non_zero_tables = np.where(
                            self.table_counts_per_doc[:, d] > 0)[0]
                        if len(non_zero_tables) == 0:
                            # If there's only one word in a doc, there are no topics to compute the full posterior for
                            no_non_zero = True
                        else:
                            no_non_zero = False
                            # We only compute the posterior for these topics
                            log_priors = np.log(
                                self.table_counts_per_doc[non_zero_tables, d])
                            log_likelihoods = np.zeros(len(non_zero_tables),
                                                       dtype=np.float32)
                            for nz_table, table in enumerate(non_zero_tables):
                                log_likelihoods[
                                    nz_table] = self.log_multivariate_tdensity(
                                        x, table)
                            log_posterior = log_priors + log_likelihoods

                            # To prevent overflow, subtract by log(p_max)
                            max_log_posterior = log_posterior.max()
                            scaled_posterior = log_posterior - max_log_posterior
                            if self.das_normalization:
                                # Not doing this now, but following what the Java impl does, however odd that seems
                                psum = np.sum(np.exp(scaled_posterior))
                            else:
                                # Java impl subtracts max before computing psum, but this seems to be wrong
                                # We still subtract first, but then multiply by the max prob afterwards
                                psum = np.exp(
                                    np.log(np.sum(np.exp(scaled_posterior))) +
                                    max_log_posterior)
                            # Now just use the scaled log posterior in the same way as in the Java impl
                            # They have a bin-search method for sampling from the cumulative dist,
                            # but we simply normalize and use Numpy to sample
                            unnormed_posterior = np.exp(scaled_posterior)
                            normed_posterior = unnormed_posterior / unnormed_posterior.sum(
                            )

                        # Don't let the alias parameters get updated in the middle of the sampling
                        self.aliases.lock.acquire_read(cust_id)
                        select_pr = psum / (
                            psum + self.alpha *
                            self.aliases.likelihood_sum.np[cust_id])

                        # MHV to draw new topic
                        # Take a number of Metropolis-Hastings samples
                        current_sample = old_table_id
                        # Calculate the true likelihood of this word under the current sample,
                        # for calculating acceptance prob
                        current_sample_log_prob = self.log_multivariate_tdensity(
                            x, current_sample)
                        for r in range(self.mh_steps):
                            # 1. Flip a coin
                            if not no_non_zero and np.random.sample(
                            ) < select_pr:
                                # Choose from the exactly computed posterior dist, only allowing
                                # topics already sampled in the doc
                                temp = np.random.choice(len(non_zero_tables),
                                                        p=normed_posterior)
                                new_sample = non_zero_tables[temp]
                                stats.log_select_pr(True, select_pr)
                            else:
                                # Choose from the alias, allowing any topic but using slightly
                                # out-of-date likelihoods
                                new_sample = self.aliases.sample_vose(cust_id)
                                stats.log_select_pr(False, select_pr)

                            if new_sample != current_sample:
                                # 2. Find acceptance probability
                                new_sample_log_prob = self.log_multivariate_tdensity(
                                    x, new_sample)
                                # This can sometimes generate an overflow warning from Numpy
                                # We don't care, though: in that case acceptance > 1., so we always accept
                                with np.errstate(over="ignore"):
                                    # From my reading of:
                                    # Li et al. (2014): Reducing the sampling complexity of topic models
                                    # the acceptance probability should be as follows:
                                    acceptance = \
                                        (self.table_counts_per_doc[new_sample, d] + self.alpha) / \
                                        (self.table_counts_per_doc[current_sample, d] + self.alpha) * \
                                        np.exp(new_sample_log_prob - current_sample_log_prob) * \
                                        (self.table_counts_per_doc[current_sample, d]*np.exp(current_sample_log_prob) +
                                         self.alpha*np.exp(self.aliases.log_likelihoods.np[cust_id, current_sample])) / \
                                        (self.table_counts_per_doc[new_sample, d]*np.exp(new_sample_log_prob) +
                                         self.alpha*np.exp(self.aliases.log_likelihoods.np[cust_id, new_sample]))
                                    # The Java implementation, however, does this:
                                    #acceptance = \
                                    #    (self.table_counts_per_doc[new_table_id, d] + self.alpha) / \
                                    #    (self.table_counts_per_doc[current_sample, d] + self.alpha) * \
                                    #    np.exp(new_prob - old_prob) * \
                                    #    (self.table_counts_per_doc[current_sample, d]*old_log_prob +
                                    #     self.alpha*alias.w.np[current_sample]) / \
                                    #    (self.table_counts_per_doc[new_table_id, d]*new_log_prob +
                                    #     self.alpha*alias.w.np[new_table_id])
                                    # The difference is the Java impl doesn't exp the log likelihood in the last
                                    # fraction, i.e. it uses a log prob instead of a prob
                                # 3. Compare against uniform[0,1]
                                # If the acceptance prob > 1, we always accept: this means the new sample
                                # has a higher probability than the old
                                if isinf(
                                        acceptance
                                ) or acceptance >= 1. or np.random.sample(
                                ) < acceptance:
                                    # No need to sample if acceptance >= 1
                                    # If the acceptance prob < 1, sample whether to accept or not, such that
                                    # the more likely the new sample is compared to the old, the more likely we
                                    # are to keep it
                                    current_sample = new_sample
                                    current_sample_log_prob = new_sample_log_prob
                                    stats.log_acceptance(True, acceptance)
                                else:
                                    stats.log_acceptance(False, acceptance)
                                # NOTE: There seems to be a small error in the Java implementation here
                                # On the last MH step, it doesn't make any difference whether we accept the
                                # sample or not - we always end up using it
                        self.aliases.lock.release_read()

                        if current_sample == old_table_id:
                            stats.log_sampled_same()
                        else:
                            stats.log_sampled_different()

                        # Now have a new assignment: add its counts
                        self.table_assignments[d][w] = current_sample
                        with self.table_counts.lock:
                            self.table_counts.np[current_sample] += 1
                        self.table_counts_per_doc[current_sample, d] += 1
                        self.sum_squared_table_customers[
                            current_sample] += np.outer(x, x)

                        self.update_table_params(current_sample, cust_id)

                # Pause the alias updater until we start the next iteration
                alias_updater.pause()

                # Output some useful stats about sampling
                if stats.acceptance_used():
                    self.log.info(
                        "Acceptance rate = {:.2f}%, mean acceptance: {:.2f} ({:,} samples draw)"
                        .format(stats.acceptance_rate() * 100.,
                                stats.mean_acceptance(),
                                stats.acceptance_samples()))
                else:
                    self.log.info("No new samples drawn")
                self.log.info(
                    "Prior select rate = {:.2f}%, mean select_pr = {:.2f}".
                    format(stats.select_pr_rate() * 100.,
                           stats.mean_select_pr()))
                self.log.info("Chose new sample: {:.2f}%".format(
                    stats.sample_change_rate() * 100.))

                if self.show_topics is not None:
                    print("Topics after iteration {}".format(iteration))
                    print(self.format_topics())
                    # Compute the overall usage of topics across the training corpus
                    topic_props = self.table_counts_per_doc.sum(axis=1).astype(
                        np.float64)
                    topic_props /= topic_props.sum()
                    print("Words using topics: {}".format(", ".join(
                        "{}={:.1f}%".format(i, prop)
                        for i, prop in enumerate(topic_props * 100.))))
                    topic_doc_props = (self.table_counts_per_doc > 0).astype(
                        np.float64).sum(axis=1)
                    topic_doc_props /= self.num_documents
                    print("Docs using topics: {}".format(", ".join(
                        "{}={:.1f}%".format(i, prop)
                        for i, prop in enumerate(topic_doc_props * 100.))))

                if self.save_path is not None:
                    self.log.info("Saving model")
                    self.save()
Ejemplo n.º 9
0
 def local_position(self, arg_aoa):
     if isinf(arg_aoa):
         self._aoa = -inf
     else:
         self._aoa = modulo_heading(arg_aoa)
def isinf(x):
    if isinstance(x, DDesc):
        return _um.isinf(ddesc_as_py(x))
    else:
        return _um.isinf(x)