예제 #1
0
    def _run(self):
        gg = GoldGetter()
        swap = self.init_swap()
        for n_golds in range(*self.num_golds):
            for n in range(self.num_trials):

                gg.reset()
                gg.random(n_golds)

                logger.debug('Running trial %d with %d golds', n, n_golds)
                logger.debug('Real n golds: %d' % len(gg.golds))
                fake = 0
                for gold in gg.golds.values():
                    if gold == -1:
                        fake += 1
                logger.debug('Fake n golds: %d' % fake)

                swap.set_gold_labels(gg.golds)
                swap.process_changes()
                self.add_trial(self.Trial(n, gg.golds, swap.score_export()))
예제 #2
0
    def _run(self):
        gg = GoldGetter()
        swap = self.init_swap()
        n = 1
        for cv in range(*self.controversial):
            for cn in range(*self.consensus):
                if cv == 0 and cn == 0:
                    continue
                gg.reset()

                logger.info('\nRunning trial %d with cv=%d cn=%d',
                            n, cv, cn)
                if cv > 0:
                    gg.controversial(cv)
                if cn > 0:
                    gg.consensus(cn,)

                swap.set_gold_labels(gg.golds)
                swap.process_changes()
                self.add_trial(
                    self.Trial(cn, cv, gg.golds, swap.score_export()))

                n += 1
예제 #3
0
class Control:
    """
        Gets classifications from database and feeds them to SWAP
    """
    def __init__(self, *args):
        """
            Initialize control

            Args:
                p0:              (Deprecated) prior subject probability
                epsilon:         (Deprecated) initial user score
        """
        if len(args) > 0:
            raise DeprecationWarning('p0 and epsilon now live in config')

        # Number of subjects with expert labels for a
        # test/train split
        self.gold_getter = GoldGetter()
        self.swap = None

    def run(self, amount=None):
        """
        Process all classifications in DB with SWAP

        .. note::
            Iterates through the classification collection of the
            database and proccesss each classification one at a time
            in the order returned by the db.
            Parameters like max_batch_size are hard-coded.
            Prints status.
        """

        if amount is None:
            amount = DB().classifications.get_stats()
            amount = amount['first_classifications']

        self.init_swap()

        # get classifications
        cursor = self.get_classifications()

        # loop over classification cursor to process
        # classifications one at a time
        logger.info('Start: SWAP Processing %d classifications', amount)

        count = 0
        with progressbar.ProgressBar(max_value=amount) as bar:
            bar.update(count)
            # Loop over all classifications of the query
            # Note that the exact size of the query might be lower than
            # n_classifications if not all classifications are being queried
            for cl in cursor:
                # process classification in swap
                cl = Classification.generate(cl)
                self._delegate(cl)
                bar.update(count)
                count += 1

                if config.control.debug and count > config.control.amount:
                    break

        if config.back_update:
            logger.info('back_update active: processing changes')
            self.swap.process_changes()
        logger.info('done')

    def _delegate(self, cl):
        """
        Passes classification to SWAP

        Purpose is to allow subclasses to override how SWAP receives
        classifications

        Parameters
        ----------
        cl : Classification
            Classification being delegated
        """
        self.swap.classify(cl)

    def init_swap(self):
        """
        Create a new SWAP instance, also passes SWAP the appropriate
        gold labels.

        Returns
        -------
        SWAP
            SWAP
        """
        logger.debug('Initializing SWAP')
        if self.swap is None:
            swap = SWAP()
        else:
            swap = self.swap

        golds = self.get_gold_labels()
        swap.set_gold_labels(golds)

        self.swap = swap
        return swap

    def get_gold_labels(self):
        """
        Get the set of gold labels being used for this run
        """
        return self.gold_getter.golds

    @staticmethod
    def get_classifications():
        """
        Get the cursor containing classifications from db

        Returns
        -------
        swap.db.Cursor
            Cursor with classifications
        """
        return DB().classifications.getClassifications()

    def getSWAP(self):
        """
        Get the SWAP instance being used

        Returns
        -------
        SWAP
            SWAP
        """
        return self.swap

    def setSWAP(self, swap):
        """
        Set the SWAP object
        """
        self.swap = swap

    def reset(self):
        """
        Reset the gold getter and SWAP instances.

        Useful when running multiple subsequent instances of SWAP
        """
        self.swap = None
        self.gold_getter.reset()