Beispiel #1
0
    def _parameter_estimation(self, learning_iterator, epsilon, max_epochs):

        if epsilon < 0.0: raise PyannoValueError("epsilon < 0.0")
        if max_epochs < 0: raise PyannoValueError("max_epochs < 0")

        info_str = "Epoch={0:6d}  obj={1:+10.4f}   diff={2:10.4f}"

        logger.info('Start parameters optimization...')

        epoch = 0
        obj_history = []
        diff = np.inf
        for objective, prev_est, cat_est, acc_est in learning_iterator:
            logger.info(info_str.format(epoch, objective, diff))

            obj_history.append(objective)

            # stopping conditions
            if epoch > max_epochs: break
            if epoch > 10:
                diff = (obj_history[epoch] - obj_history[epoch - 10]) / 10.0
                if abs(diff) < epsilon: break

            epoch += 1

        logger.info('Parameters optimization finished')

        # update internal parameters
        self.pi = prev_est
        self.theta = acc_est

        return cat_est
Beispiel #2
0
def _fleiss_kappa_nannotations(nannotations):
    """Compute Fleiss' kappa gien number of annotations per class format.

    This is a testable helper for fleiss_kappa.
    """

    nitems = nannotations.shape[0]

    # check that all rows are annotated by the same number of annotators
    _nanno_sum = nannotations.sum(1)
    nannotations_per_item = _nanno_sum[0]
    if not np.all(_nanno_sum == nannotations_per_item):
        raise PyannoValueError(
            'Number of annotations per item is not constant.'
        )

    # empirical frequency of categories
    freqs = nannotations.sum(0) / (nitems*nannotations_per_item)
    chance_agreement = (freqs**2.).sum()

    # annotator agreement for i-th item, relative to possible annotators pairs
    agreement_rate = (((nannotations**2.).sum(1) - nannotations_per_item)
                      / (nannotations_per_item*(nannotations_per_item-1.)))
    observed_agreement = agreement_rate.mean()

    return chance_adjusted_agreement(observed_agreement, chance_agreement)
Beispiel #3
0
 def _check_consistency(self, data_id, anno_container):
     """Make sure that all entries with same ID have the same annotations.
     """
     if data_id in self.database:
         previous = self.database[data_id]
         if len(previous) > 0:
             # check if the new annotations are the same as the previous
             if not np.all(previous[0].anno_container.annotations ==
                           anno_container.annotations):
                 msg = ('Conflicting annotations with same ID. Please '
                        'rename the new entry.')
                 raise PyannoValueError(msg)
Beispiel #4
0
    def _from_generator(rows_generator, missing_values, name=''):

        missing_set = set(missing_values)
        labels_set = set()

        raw_annotations = []
        nannotators = None
        for n, row in enumerate(rows_generator):

            # verify that number of lines is consistent in the whole file
            if nannotators is None: nannotators = len(row)
            else:
                if len(row) != nannotators:
                    raise PyannoValueError(
                        'File has inconsistent number of entries '
                        'on separate lines (line {})'.format(n))

            raw_annotations.append(row)
            labels_set.update(row)

        # remove missing values from set of labels
        all_labels = sorted(list(labels_set - missing_set))
        missing_values = sorted(list(missing_set & labels_set))

        # workaround for np.nan != np.nan, so intersection does not work
        if _is_nan_in_list(all_labels):
            # uses fact that np.nan < x, for every x
            all_labels = all_labels[1:]
            missing_values.insert(0, np.nan)

        # create annotations object
        anno = AnnotationsContainer(
            raw_annotations = raw_annotations,
            labels = all_labels,
            missing_values = missing_values,
            name = name
        )

        return anno
Beispiel #5
0
def sample_from_proposal_distribution(theta, step, lower, upper):
    """Returns one sample from the proposal distribution.

    Arguments
    ---------
    theta : float
        current parameter value

    step : float
        width of the proposal distribution over `theta`

    lower : float
        lower bound for `theta`

    upper : float
        upper bound for `theta`

    Returns
    -------
    theta_new : float
        new sample from the distribution over theta

    log_q_ratio : float
        log-ratio of probability of new value given old value
        to probability of old value given new value
    """

    if theta < lower or theta > upper:
        raise PyannoValueError('Parameter values out or range')

    # *a* is a uniform random number
    a = np.random.random()

    # boundary conditions
    if theta == upper:
        theta = upper - a * min(step, upper - lower)
        q_new_to_old = -np.log(2 * min(step, upper - theta))
        q_old_to_new = -np.log(min(step, upper - lower))
        log_q_ratio = q_new_to_old - q_old_to_new
        return theta, log_q_ratio

    if theta == lower:
        theta = lower + a * min(step, upper - lower)
        q_new_to_old = -np.log(2. * min(step, theta - lower))
        q_old_to_new = -np.log(min(step, upper - lower))
        log_q_ratio = q_new_to_old - q_old_to_new
        return theta, log_q_ratio

    # go to the 'left'
    if  a > 0.5:
        theta_old = theta

        #jump interval is *theta*, choose uniformly
        theta -= np.random.random() * min(step, theta_old - lower)

        #transition probability from old to new
        q_old_to_new = -np.log(min(step, theta_old - lower))
        q_new_to_old = -np.log(min(step, upper - theta))
        log_q_ratio = q_new_to_old - q_old_to_new
        return theta, log_q_ratio

    # go to the 'right'
    else:
        # jump interval is *upper_limit*-*theta*, choose uniformly
        theta_old = theta
        theta += np.random.random() * min(step, upper - theta_old)

        q_old_to_new = -np.log(min(step, upper - theta_old))
        q_new_to_old = -np.log(min(step, theta - lower))
        log_q_ratio = q_new_to_old - q_old_to_new
        return theta, log_q_ratio
Beispiel #6
0
 def _raise_if_incompatible(self, annotations):
     if not self.are_annotations_compatible(annotations):
         raise PyannoValueError('Annotations are incompatible with model '
                                'parameters')