def _run(self): gg = GoldGetter() swap = self.init_swap() for n_golds in range(*self.num_golds): for n in range(self.num_trials): gg.reset() gg.random(n_golds) logger.debug('Running trial %d with %d golds', n, n_golds) logger.debug('Real n golds: %d' % len(gg.golds)) fake = 0 for gold in gg.golds.values(): if gold == -1: fake += 1 logger.debug('Fake n golds: %d' % fake) swap.set_gold_labels(gg.golds) swap.process_changes() self.add_trial(self.Trial(n, gg.golds, swap.score_export()))
def _run(self): gg = GoldGetter() swap = self.init_swap() n = 1 for cv in range(*self.controversial): for cn in range(*self.consensus): if cv == 0 and cn == 0: continue gg.reset() logger.info('\nRunning trial %d with cv=%d cn=%d', n, cv, cn) if cv > 0: gg.controversial(cv) if cn > 0: gg.consensus(cn,) swap.set_gold_labels(gg.golds) swap.process_changes() self.add_trial( self.Trial(cn, cv, gg.golds, swap.score_export())) n += 1
class Control: """ Gets classifications from database and feeds them to SWAP """ def __init__(self, *args): """ Initialize control Args: p0: (Deprecated) prior subject probability epsilon: (Deprecated) initial user score """ if len(args) > 0: raise DeprecationWarning('p0 and epsilon now live in config') # Number of subjects with expert labels for a # test/train split self.gold_getter = GoldGetter() self.swap = None def run(self, amount=None): """ Process all classifications in DB with SWAP .. note:: Iterates through the classification collection of the database and proccesss each classification one at a time in the order returned by the db. Parameters like max_batch_size are hard-coded. Prints status. """ if amount is None: amount = DB().classifications.get_stats() amount = amount['first_classifications'] self.init_swap() # get classifications cursor = self.get_classifications() # loop over classification cursor to process # classifications one at a time logger.info('Start: SWAP Processing %d classifications', amount) count = 0 with progressbar.ProgressBar(max_value=amount) as bar: bar.update(count) # Loop over all classifications of the query # Note that the exact size of the query might be lower than # n_classifications if not all classifications are being queried for cl in cursor: # process classification in swap cl = Classification.generate(cl) self._delegate(cl) bar.update(count) count += 1 if config.control.debug and count > config.control.amount: break if config.back_update: logger.info('back_update active: processing changes') self.swap.process_changes() logger.info('done') def _delegate(self, cl): """ Passes classification to SWAP Purpose is to allow subclasses to override how SWAP receives classifications Parameters ---------- cl : Classification Classification being delegated """ self.swap.classify(cl) def init_swap(self): """ Create a new SWAP instance, also passes SWAP the appropriate gold labels. Returns ------- SWAP SWAP """ logger.debug('Initializing SWAP') if self.swap is None: swap = SWAP() else: swap = self.swap golds = self.get_gold_labels() swap.set_gold_labels(golds) self.swap = swap return swap def get_gold_labels(self): """ Get the set of gold labels being used for this run """ return self.gold_getter.golds @staticmethod def get_classifications(): """ Get the cursor containing classifications from db Returns ------- swap.db.Cursor Cursor with classifications """ return DB().classifications.getClassifications() def getSWAP(self): """ Get the SWAP instance being used Returns ------- SWAP SWAP """ return self.swap def setSWAP(self, swap): """ Set the SWAP object """ self.swap = swap def reset(self): """ Reset the gold getter and SWAP instances. Useful when running multiple subsequent instances of SWAP """ self.swap = None self.gold_getter.reset()