Ejemplo n.º 1
0
 def __init__(self, threshold=0.75, force=False):
   self.dict_file = os.path.join(csrec_paths.get_dataset_dir(),'baseline_feat_dict')
   self.threshold = threshold
   sq = get_sqler()
   self.cursor = sq.db.cursor()
   self.features = ['priority', 'priority2', 'vouched', 'references_count', 
     'friend_link_count', 'references_to_count']
   if os.path.exists(self.dict_file) and not force:
     print 'load user baseline dictionary from %s'%self.dict_file
     self.dict = cPickle.load(open(self.dict_file, 'r'))
   else:
     self.dict = {}
     self.create_dictionary()
     cPickle.dump(self.dict, open(self.dict_file, 'w'))    
   
   self.thresh_file = os.path.join(csrec_paths.get_dataset_dir(),'baseline_feat_threshs_'+str(self.threshold))
   if  os.path.exists(self.thresh_file) and not force:
     self.threshs = cPickle.load(open(self.thresh_file, 'r'))
   else:
     self.threshs = []
     self.create_threshs()
     cPickle.dump(self.threshs, open(self.thresh_file, 'w'))
Ejemplo n.º 2
0
 def init_db(self):
   self.sqler = get_sqler()
   self.sq = self.sqler.db
   self.cursor = self.sq.cursor()
Ejemplo n.º 3
0
        t -= time.time()
        #print 'Res for this request: ', len(results)
        for res in results:
          #print 'convert req_id %d'%res[0]
          pkl_dump = res[1]  
          r = cPickle.loads(pkl_dump)            
          self.outer_products[res[0]] = r
        req_len_cnter = 0
        first_elem = True
    
    t_out -= time.time()
    print '\t creating outer prods took %f secs'%-t_out
          
if __name__=='__main__':
  # get the damn req_ids
  sq = get_sqler()
  cursor = sq.db.cursor()
  cursor.execute("Select id from couchrequest limit 0,500000")
  req_ids = cursor.fetchall()
  req_ids = [int(x[0]) for x in req_ids]
  opg = OuterProducGetter(4000)
  t = time.time()
  opg.create_outer_prods_from_req_ids(req_ids)
  t -= time.time()
  print 'the whole reading took %f'%-t