def __init__(self, threshold=0.75, force=False): self.dict_file = os.path.join(csrec_paths.get_dataset_dir(),'baseline_feat_dict') self.threshold = threshold sq = get_sqler() self.cursor = sq.db.cursor() self.features = ['priority', 'priority2', 'vouched', 'references_count', 'friend_link_count', 'references_to_count'] if os.path.exists(self.dict_file) and not force: print 'load user baseline dictionary from %s'%self.dict_file self.dict = cPickle.load(open(self.dict_file, 'r')) else: self.dict = {} self.create_dictionary() cPickle.dump(self.dict, open(self.dict_file, 'w')) self.thresh_file = os.path.join(csrec_paths.get_dataset_dir(),'baseline_feat_threshs_'+str(self.threshold)) if os.path.exists(self.thresh_file) and not force: self.threshs = cPickle.load(open(self.thresh_file, 'r')) else: self.threshs = [] self.create_threshs() cPickle.dump(self.threshs, open(self.thresh_file, 'w'))
def init_db(self): self.sqler = get_sqler() self.sq = self.sqler.db self.cursor = self.sq.cursor()
t -= time.time() #print 'Res for this request: ', len(results) for res in results: #print 'convert req_id %d'%res[0] pkl_dump = res[1] r = cPickle.loads(pkl_dump) self.outer_products[res[0]] = r req_len_cnter = 0 first_elem = True t_out -= time.time() print '\t creating outer prods took %f secs'%-t_out if __name__=='__main__': # get the damn req_ids sq = get_sqler() cursor = sq.db.cursor() cursor.execute("Select id from couchrequest limit 0,500000") req_ids = cursor.fetchall() req_ids = [int(x[0]) for x in req_ids] opg = OuterProducGetter(4000) t = time.time() opg.create_outer_prods_from_req_ids(req_ids) t -= time.time() print 'the whole reading took %f'%-t