Example #1
0
 def add_seeds(self):
     '''adds seeds to db.coll with reliability score of 1.0'''
     self.logger.debug('add_seeds: %d %s' % (len(self.seeds), self.seeds))
     for s in self.seeds:
         self.logger.debug('seed: %s' % s)
         args = s.split('\t')
         doc = {'arg%d' % n: v for n, v in enumerate(args, 1)}
         doc['it'] = 0
         doc['score'] = 1.0
         mongodb.cache(self.db, self.boot_i, doc)
Example #2
0
 def add_seeds(self):
     '''adds seeds to db.coll with reliability score of 1.0'''
     self.logger.debug('add_seeds: %d %s' % 
                       (len(self.seeds), self.seeds))
     for s in self.seeds:
         self.logger.debug('seed: %s' % s)
         args = s.split('\t')
         doc = {'arg%d'%n:v
                for n,v in enumerate(args, 1)}
         doc['it'] = 0
         doc['score'] = 1.0
         mongodb.cache(self.db, self.boot_i, doc)
Example #3
0
    def iterate_p(self, mutexes=[]):
        '''perform an iteration of bootstrapping saving n patterns with the 
        highest reliability score'''
        if not getattr(self, 'connection', None):
            self.init_connection()

        self.logger.info(' ### BOOTSTRAPPING PATTERN ITERATION: %d ###' %
                         self.it)

        # read promoted instances of last bootstrpping iteration
        self.logger.info('getting promoted instances...' '')
        I = self.get_I(self.it - 1)
        self.logger.info('I: %d' % len(I))
        self.logger.info('getting promoted instances: done.' '')

        # find matching patterns
        self.logger.info('getting matching patterns...')
        P_ = self.I2P(I)
        P = self.mutex_filter_p(I, P_, mutexes)
        self.logger.info('getting matching patterns: done.')

        # rank patterns by reliability score
        self.logger.info('ranking patterns ...')
        rs = self.scorer.rank_patterns(I, P, self.it)
        self.logger.info('ranking patterns: done.')

        # save top n to <matrix>_boot_p
        self.logger.info('saving top %d patterns...' % self.n)
        for r in rs[:self.n]:
            self.logger.info('r: %s' % r)
            mongodb.cache(self.db, self.boot_p, r)
        self.logger.info('saving top %d patterns: done.' % self.n)

        self.logger.info('ensuring indices ...')
        # index for iteration number
        self.db[self.boot_p].ensure_index([
            ('it', pymongo.DESCENDING),
        ])
        # index for <REL>
        self.db[self.boot_p].ensure_index([
            ('rel', pymongo.ASCENDING),
        ])
        self.logger.info('ensuring indices: done.')
Example #4
0
    def iterate_i(self, mutexes=[]):
        '''perform an iteration of bootstrapping saving n instances with the 
        highest reliability score'''
        if not getattr(self, 'connection', None):
            self.init_connection()

        self.logger.info(' ### BOOTSTRAPPING INSTANCE ITERATION: %d ###' %
                         self.it)

        # read promoted patterns of last bootstrpping iteration
        self.logger.info('getting promoted patterns...' '')
        P = self.get_P(self.it)
        self.logger.info('P: %d' % len(P))
        self.logger.info('getting promoted patterns: done.' '')

        # find matching instances
        self.logger.info('getting matching instances...')
        I_ = self.P2I(P)
        I = self.mutex_filter_i(I_, P, mutexes)
        self.logger.info('getting matching instances: done.')

        # rank instances by reliability score
        self.logger.info('ranking instances ...')
        rs = self.scorer.rank_instances(I, P, self.it)
        self.logger.info('ranking instances: done.')

        # save top n to <matrix>_boot_p
        self.logger.info('saving top %d instances...' % self.n)
        for r in rs[:self.n]:
            self.logger.info('r: %s' % r)
            mongodb.cache(self.db, self.boot_i, r)
        self.logger.info('saving top %d instances: done.' % self.n)

        self.logger.info('ensuring indices ...')
        # index for iteration number
        self.db[self.boot_i].ensure_index([
            ('it', pymongo.DESCENDING),
        ])
        # index for <ARGJ,...,ARGN>
        self.db[self.boot_i].ensure_index([(arg, pymongo.ASCENDING)
                                           for arg in self.args])
        self.logger.info('ensuring indices: done.')
Example #5
0
    def iterate_i(self, mutexes=[]):
        '''perform an iteration of bootstrapping saving n instances with the 
        highest reliability score'''
        if not getattr(self, 'connection', None):
            self.init_connection()

        self.logger.info(' ### BOOTSTRAPPING INSTANCE ITERATION: %d ###' % 
                         self.it)

        # read promoted patterns of last bootstrpping iteration
        self.logger.info('getting promoted patterns...''')
        P = self.get_P(self.it)
        self.logger.info('P: %d' % len(P))
        self.logger.info('getting promoted patterns: done.''')

        # find matching instances
        self.logger.info('getting matching instances...')
        I_ = self.P2I(P)
        I = self.mutex_filter_i(I_, P, mutexes)
        self.logger.info('getting matching instances: done.')

        # rank instances by reliability score
        self.logger.info('ranking instances ...')
        rs = self.scorer.rank_instances(I, P, self.it)
        self.logger.info('ranking instances: done.')

        # save top n to <matrix>_boot_p
        self.logger.info('saving top %d instances...' % self.n)
        for r in rs[:self.n]:
            self.logger.info('r: %s' % r)
            mongodb.cache(self.db, self.boot_i, r)
        self.logger.info('saving top %d instances: done.' % self.n)

        self.logger.info('ensuring indices ...')
        # index for iteration number
        self.db[self.boot_i].ensure_index( [('it', pymongo.DESCENDING), ] )
        # index for <ARGJ,...,ARGN>
        self.db[self.boot_i].ensure_index(
            [(arg, pymongo.ASCENDING)
             for arg in self.args]
            )
        self.logger.info('ensuring indices: done.')
Example #6
0
    def iterate_p(self, mutexes=[]):
        '''perform an iteration of bootstrapping saving n patterns with the 
        highest reliability score'''
        if not getattr(self, 'connection', None):
            self.init_connection()

        self.logger.info(' ### BOOTSTRAPPING PATTERN ITERATION: %d ###' % 
                         self.it)

        # read promoted instances of last bootstrpping iteration
        self.logger.info('getting promoted instances...''')
        I = self.get_I(self.it-1)
        self.logger.info('I: %d' % len(I))
        self.logger.info('getting promoted instances: done.''')

        # find matching patterns
        self.logger.info('getting matching patterns...')
        P_ = self.I2P(I)
        P = self.mutex_filter_p(I, P_, mutexes)
        self.logger.info('getting matching patterns: done.')

        # rank patterns by reliability score
        self.logger.info('ranking patterns ...')
        rs = self.scorer.rank_patterns(I, P, self.it)
        self.logger.info('ranking patterns: done.')

        # save top n to <matrix>_boot_p
        self.logger.info('saving top %d patterns...' % self.n)
        for r in rs[:self.n]:
            self.logger.info('r: %s' % r)
            mongodb.cache(self.db, self.boot_p, r)
        self.logger.info('saving top %d patterns: done.' % self.n)

        self.logger.info('ensuring indices ...')
        # index for iteration number
        self.db[self.boot_p].ensure_index( [('it', pymongo.DESCENDING), ] )
        # index for <REL>
        self.db[self.boot_p].ensure_index( [('rel', pymongo.ASCENDING), ] )
        self.logger.info('ensuring indices: done.')