コード例 #1
0
def main():
    from optparse import OptionParser
    usage = '''%prog [options] [database] [collection]'''
    parser = OptionParser(usage=usage)
    parser.add_option('-o',
                      '--host',
                      dest='host',
                      default='localhost',
                      help='''mongodb host machine name. default: localhost''')
    parser.add_option(
        '-p',
        '--port',
        dest='port',
        type=int,
        default=27017,
        help='''mongodb host machine port number. default: 27017''')
    options, args = parser.parse_args()
    if len(args) != 2:
        parser.print_help()
        exit(1)
    db_, coll = args
    connection = pymongo.Connection(options.host, options.port)
    db = connection[db_]
    esp_i_writer = csv.DictWriter(sys.stdout, ('it', 'score', 'rel'),
                                  extrasaction='ignore')
    for r in mongodb.fast_find(db, coll):
        esp_i_writer.writerow(r)
コード例 #2
0
ファイル: bootstrapper.py プロジェクト: aurora1625/web-ka
 def get_P(self, it, query={}):
     '''retrieves patterns that match query from iteration it'''
     if self.keep:
         query['it'] = {'$lte':it}
     else:
         query['it'] = it
     return [r['rel'] 
             for r in mongodb.fast_find(
             self.db, self.boot_p, query, fields=['rel']
             ) ]
コード例 #3
0
ファイル: bootstrapper.py プロジェクト: quintia/web-ka
 def get_P(self, it, query={}):
     '''retrieves patterns that match query from iteration it'''
     if self.keep:
         query['it'] = {'$lte': it}
     else:
         query['it'] = it
     return [
         r['rel'] for r in mongodb.fast_find(
             self.db, self.boot_p, query, fields=['rel'])
     ]
コード例 #4
0
ファイル: bootstrapper.py プロジェクト: quintia/web-ka
 def get_I(self, it, query={}):
     '''retrieves instances that match query from iteration it'''
     if self.keep:
         query['it'] = {'$lte': it}
     else:
         query['it'] = it
     return [
         tuple([v for k, v in sorted(r.items()) if k.startswith('arg')])
         for r in mongodb.fast_find(
             self.db, self.boot_i, query, fields=self.args)
     ]
コード例 #5
0
ファイル: bootstrapper.py プロジェクト: aurora1625/web-ka
 def get_I(self, it, query={}):
     '''retrieves instances that match query from iteration it'''
     if self.keep:
         query['it'] = {'$lte':it}
     else:
         query['it'] = it
     return [tuple( [v
                    for k,v in sorted(r.items()) 
                    if k.startswith('arg')] )
             for r in mongodb.fast_find(
             self.db, self.boot_i, query, fields=self.args
             ) ]
コード例 #6
0
ファイル: bootstrapper.py プロジェクト: aurora1625/web-ka
 def I2P(self, I):
     '''retrieve patterns that match promoted instances in I and
     have not been retrieved in past iteration'''
     P = [r['rel']
          for i in I
          for r in mongodb.fast_find(
             self.db, self.matrix, 
             mongodb.make_query(i=i,p=None), fields=['rel']
             )
          if not self.db[self.boot_p].find_one({'rel':r['rel']}) ]
     P_ = tuple(sorted(set(P)))
     self.logger.info('P: %d => %d' % (len(P), len(P_)))
     return P_
コード例 #7
0
ファイル: bootstrapper.py プロジェクト: quintia/web-ka
 def I2P(self, I):
     '''retrieve patterns that match promoted instances in I and
     have not been retrieved in past iteration'''
     P = [
         r['rel'] for i in I
         for r in mongodb.fast_find(self.db,
                                    self.matrix,
                                    mongodb.make_query(i=i, p=None),
                                    fields=['rel'])
         if not self.db[self.boot_p].find_one({'rel': r['rel']})
     ]
     P_ = tuple(sorted(set(P)))
     self.logger.info('P: %d => %d' % (len(P), len(P_)))
     return P_
コード例 #8
0
ファイル: bootstrapper.py プロジェクト: quintia/web-ka
 def P2I(self, P):
     '''retrieve instances that match promoted patterns in P and
     have not been retrieved in past iteration'''
     I = [
         tuple([v for k, v in sorted(r.items()) if k.startswith('arg')])
         for p in P
         for r in mongodb.fast_find(self.db,
                                    self.matrix,
                                    mongodb.make_query(i=None, p=p),
                                    fields=self.args)
         if not self.db[self.boot_i].find_one(
             {k: v
              for k, v in sorted(r.items()) if k.startswith('arg')})
     ]
     I_ = tuple(sorted(set(I)))
     self.logger.info('I: %d => %d' % (len(I), len(I_)))
     return I_
コード例 #9
0
ファイル: bootstrapper.py プロジェクト: aurora1625/web-ka
 def P2I(self, P):
     '''retrieve instances that match promoted patterns in P and
     have not been retrieved in past iteration'''
     I = [tuple( [v
                  for k,v in sorted(r.items())
                  if k.startswith('arg')] )
          for p in P
          for r in mongodb.fast_find(
             self.db, self.matrix, 
             mongodb.make_query(i=None,p=p), fields=self.args
             )
          if not self.db[self.boot_i].find_one(
             {k:v 
              for k,v in sorted(r.items())
              if k.startswith('arg')} ) ]
     I_ = tuple(sorted(set(I)))
     self.logger.info('I: %d => %d' % (len(I), len(I_)))
     return I_
コード例 #10
0
ファイル: instances2csv.py プロジェクト: aurora1625/web-ka
def main():
    from optparse import OptionParser
    usage = '''%prog [options] [database] [collection]'''
    parser = OptionParser(usage=usage)
    parser.add_option('-o', '--host', dest='host', default='localhost',
                      help='''mongodb host machine name. default: localhost''')    
    parser.add_option('-p', '--port', dest='port', type=int, default=27017,
                      help='''mongodb host machine port number. default: 27017''')
    options, args = parser.parse_args()
    if len(args) != 2:
        parser.print_help()
        exit(1)
    db_, coll = args
    connection = pymongo.Connection(options.host, options.port)
    db = connection[db_]
    esp_i_writer = csv.DictWriter(
        sys.stdout, 
        ('it', 'score', 'arg1', 'arg2', 'arg3'), 
        extrasaction='ignore')
    for r in mongodb.fast_find(db, coll):
        esp_i_writer.writerow(r)
コード例 #11
0
ファイル: matrix2pmi.py プロジェクト: quintia/web-ka
 def make_pmi_ip(self):
     '''creates a collection <matrix>_pmi_ip containing instance*relation
     Pointwise Mutual Information scores and returns its name'''
     print >>sys.stderr, '%s: calculating instance*pattern PMI...' % self.fullname
     xs = mongodb.fast_find(self.db, self.matrix, batch=self.batch)
     for n,x in enumerate(xs, 1):
         p = x['rel']
         rel = [('rel', p), ]
         i = [x[a] for a in self.argv]
         args = zip(self.argv, i)
         pmi = zip(('dpmi', 'discount', 'pmi'), self.discounted_pmi(i,p))
         y = SON(rel+args+pmi)
         self.db[self._pmi_ip].save(y)
         if n%10000 == 0:
             print >>sys.stderr, '# %8d PMI scores calculated' % n
     print >>sys.stderr, '%s: calculating instance*pattern PMI: done.' % self.fullname
     ensure_indices(self.db, self._pmi_ip)
     self.db[self._pmi_ip].ensure_index(
         [('dpmi', pymongo.DESCENDING), 
          ('pmi', pymongo.DESCENDING), ]
         )