def main(): from optparse import OptionParser usage = '''%prog [options] [database] [collection]''' parser = OptionParser(usage=usage) parser.add_option('-o', '--host', dest='host', default='localhost', help='''mongodb host machine name. default: localhost''') parser.add_option( '-p', '--port', dest='port', type=int, default=27017, help='''mongodb host machine port number. default: 27017''') options, args = parser.parse_args() if len(args) != 2: parser.print_help() exit(1) db_, coll = args connection = pymongo.Connection(options.host, options.port) db = connection[db_] esp_i_writer = csv.DictWriter(sys.stdout, ('it', 'score', 'rel'), extrasaction='ignore') for r in mongodb.fast_find(db, coll): esp_i_writer.writerow(r)
def get_P(self, it, query={}): '''retrieves patterns that match query from iteration it''' if self.keep: query['it'] = {'$lte':it} else: query['it'] = it return [r['rel'] for r in mongodb.fast_find( self.db, self.boot_p, query, fields=['rel'] ) ]
def get_P(self, it, query={}): '''retrieves patterns that match query from iteration it''' if self.keep: query['it'] = {'$lte': it} else: query['it'] = it return [ r['rel'] for r in mongodb.fast_find( self.db, self.boot_p, query, fields=['rel']) ]
def get_I(self, it, query={}): '''retrieves instances that match query from iteration it''' if self.keep: query['it'] = {'$lte': it} else: query['it'] = it return [ tuple([v for k, v in sorted(r.items()) if k.startswith('arg')]) for r in mongodb.fast_find( self.db, self.boot_i, query, fields=self.args) ]
def get_I(self, it, query={}): '''retrieves instances that match query from iteration it''' if self.keep: query['it'] = {'$lte':it} else: query['it'] = it return [tuple( [v for k,v in sorted(r.items()) if k.startswith('arg')] ) for r in mongodb.fast_find( self.db, self.boot_i, query, fields=self.args ) ]
def I2P(self, I): '''retrieve patterns that match promoted instances in I and have not been retrieved in past iteration''' P = [r['rel'] for i in I for r in mongodb.fast_find( self.db, self.matrix, mongodb.make_query(i=i,p=None), fields=['rel'] ) if not self.db[self.boot_p].find_one({'rel':r['rel']}) ] P_ = tuple(sorted(set(P))) self.logger.info('P: %d => %d' % (len(P), len(P_))) return P_
def I2P(self, I): '''retrieve patterns that match promoted instances in I and have not been retrieved in past iteration''' P = [ r['rel'] for i in I for r in mongodb.fast_find(self.db, self.matrix, mongodb.make_query(i=i, p=None), fields=['rel']) if not self.db[self.boot_p].find_one({'rel': r['rel']}) ] P_ = tuple(sorted(set(P))) self.logger.info('P: %d => %d' % (len(P), len(P_))) return P_
def P2I(self, P): '''retrieve instances that match promoted patterns in P and have not been retrieved in past iteration''' I = [ tuple([v for k, v in sorted(r.items()) if k.startswith('arg')]) for p in P for r in mongodb.fast_find(self.db, self.matrix, mongodb.make_query(i=None, p=p), fields=self.args) if not self.db[self.boot_i].find_one( {k: v for k, v in sorted(r.items()) if k.startswith('arg')}) ] I_ = tuple(sorted(set(I))) self.logger.info('I: %d => %d' % (len(I), len(I_))) return I_
def P2I(self, P): '''retrieve instances that match promoted patterns in P and have not been retrieved in past iteration''' I = [tuple( [v for k,v in sorted(r.items()) if k.startswith('arg')] ) for p in P for r in mongodb.fast_find( self.db, self.matrix, mongodb.make_query(i=None,p=p), fields=self.args ) if not self.db[self.boot_i].find_one( {k:v for k,v in sorted(r.items()) if k.startswith('arg')} ) ] I_ = tuple(sorted(set(I))) self.logger.info('I: %d => %d' % (len(I), len(I_))) return I_
def main(): from optparse import OptionParser usage = '''%prog [options] [database] [collection]''' parser = OptionParser(usage=usage) parser.add_option('-o', '--host', dest='host', default='localhost', help='''mongodb host machine name. default: localhost''') parser.add_option('-p', '--port', dest='port', type=int, default=27017, help='''mongodb host machine port number. default: 27017''') options, args = parser.parse_args() if len(args) != 2: parser.print_help() exit(1) db_, coll = args connection = pymongo.Connection(options.host, options.port) db = connection[db_] esp_i_writer = csv.DictWriter( sys.stdout, ('it', 'score', 'arg1', 'arg2', 'arg3'), extrasaction='ignore') for r in mongodb.fast_find(db, coll): esp_i_writer.writerow(r)
def make_pmi_ip(self): '''creates a collection <matrix>_pmi_ip containing instance*relation Pointwise Mutual Information scores and returns its name''' print >>sys.stderr, '%s: calculating instance*pattern PMI...' % self.fullname xs = mongodb.fast_find(self.db, self.matrix, batch=self.batch) for n,x in enumerate(xs, 1): p = x['rel'] rel = [('rel', p), ] i = [x[a] for a in self.argv] args = zip(self.argv, i) pmi = zip(('dpmi', 'discount', 'pmi'), self.discounted_pmi(i,p)) y = SON(rel+args+pmi) self.db[self._pmi_ip].save(y) if n%10000 == 0: print >>sys.stderr, '# %8d PMI scores calculated' % n print >>sys.stderr, '%s: calculating instance*pattern PMI: done.' % self.fullname ensure_indices(self.db, self._pmi_ip) self.db[self._pmi_ip].ensure_index( [('dpmi', pymongo.DESCENDING), ('pmi', pymongo.DESCENDING), ] )