def main(): rx = bd.BeeStringDict('RX', keysize=9, readonly=1) rx_set = bd.BeeStringDict('RXset', keysize=9, readonly=0) count = -1 empty = 0 keys = sorted(rx.keys()) print len(keys) for key in keys: line = rx.get(key, '') if len(line) > 9: subs = line.split('|') codes = [] for sub in subs: code = sub.split(':')[0] if len(code) > 4: codes.append(code) #print codes rx_set[key] = set(codes) rx_set.commit() else: empty += 1 count += 1 if count % 10000 == 0: print rich_string_wrap(num2comma2(count),'y',0,'k',0), \ rich_string_wrap(num2comma2(empty),'r',0,'k',0), ('%4.3f ' % (float(empty+0.5)/float(count+0.5)))
def process_pat_worker(in_queue, out_queue, plock, process_function, db_list, **kwargs): for db in db_list: kwargs[db] = bd.BeeStringDict(db_list[db][0] + db, keysize=db_list[db][1], readonly=True) #people_dx = bd.BeeStringDict('kanix_matches_F_Breast_Cancer.txt_demo_bd', # keysize = 9, readonly=True) #kwargs['person_dx_info'] = people_dx ### BADDD #rx = bd.BeeStringDict(marketscan+'RX', keysize = 9, readonly=True) #dx = bd.BeeStringDict(marketscan+'DX', keysize = 9, readonly=True) #demo = bd.BeeStringDict(marketscan+'ID_demographics', keysize = 9, readonly=True) #rxset = bd.BeeStringDict(marketscan+'RXSet', keysize = 9, readonly=True) #dx = bd.BeeStringDict(marketscan+'DX', keysize = 9, readonly=True) ## iter = reads until get "None" for (patline, index, tot_pat) in iter(in_queue.get, None): if index % 50000 == 0: plock.acquire() print 'AT {:1.2f} %\n'.format(index / float(tot_pat) * 100) sys.stdout.flush() plock.release() result = process_function(patline, **kwargs) if not result is None: out_queue.put(result) out_queue.put(None) plock.acquire() print 'child: is in q empty? ' + str(in_queue.empty()) plock.release()