class DBTestCase(unittest.TestCase): def setUp(self): self.db = DBHelper() self.samples =self.db.get_results_by_clms(columns='*',table= 'samples',database=DB_test,isdict=True) def tearDown(self): self.db = None def test_get_max_group_id(self): res =self.db.get_max_group_id('refined_info_2','xyzb') res_2 =self.db.get_max_group_id('info_template','xyzb') print res print res_2 def test_batch_insert(self): entrys =self.db.get_results_by_clms(columns='*',table= 'refined_info',database=DB_1,isdict=True)[:12056] table = 'info_tmp1' self.db.recr_table(table,DB_1,'info_template',DB_1); self.db.batch_insert(entrys,table,DB_1) def test_batch_get(self): entrys =self.db.get_results_by_clms(columns='*',table= 'refined_info',database=DB_1,isdict=True)[:2] for key, value in entrys[1].items(): print key,value,type(value)
def test_diff_tables(self): dc = datachecker() db_helper = DBHelper() new_table = 'extracted_full_info_today' old_table = 'info_tmp' old_db = DB_test new_db = DB_1 db_helper.recr_table(old_table,old_db,'info_template',DB_1) entrys =dc.diff_tables(table_old=old_table,table_new=new_table,db_old=old_db,db_new=new_db) db_helper.batch_insert(entrys,table=old_table,db=old_db)
class scheduler(): """ To schedule the data flow """ def __init__(self): self.db = DBHelper() self.dc = datachecker() self.sb = submitter() self.dd = data_drawer() self.sg = segger() self.id = indexer() self.pc = prechecker() self.dmg_on = DMG_ON self.test_on = TEST_ON if len(sys.argv) < 4: self.table_old = "refined_info" self.db_old = DB_1 self.table_new = "extracted_info" self.db_new = DB_1 else: self.table_old = sys.argv[1] self.db_old = sys.argv[2] self.table_new = sys.argv[3] self.db_new = sys.argv[4] def data_flow(self): threads = [] self.pc.pre_process() #remove duplicates entrys = self.dc.diff_tables(self.table_old,self.table_new,self.db_old,self.db_new) #insert the new coming entrys into database self.db.batch_insert(entrys,self.table_old,self.db_old) if (self.test_on): t = threading.Thread(target=self.db.batch_insert,args=(entrys,self.table_old,DB_test)) t.start() threads.append(t) if(self.dmg_on): #submite the updates to dmg self.sb.deal_submit_req(entrys) #get the all data from dmg entrys = dd.get_entrys() self.db.recr_table(TB_DMG,DB_1,TB_TEMPLATE,DB_1) t = threading.Thread(target=self.db.batch_insert,args=(entrys,TB_DMG,DB_1)) t.start() threads.append(t) else: entrys = self.db.get_results_by_clms(columns='*',table= self.table_old, database=self.db_old,isdict=True) entrys = sorted(entrys,key=self.sort_func,reverse=True) #serilize entrys to protobuffers entrys = self.sg.serilize_entrys(entrys) #remote call word segging entrys = self.sg.word_seg_list(entrys) # #remote call indexing self.id.index_list_string(entrys) # wait until all threads are over for t in threads: t.join() def sort_func(self,entry): info_type = entry.get(INFO_TYPE) if info_type == 1: time = entry.get(MEETING_TIME) else: time = entry.get(RELEASE_DATE) if time is None: return 0 else : return get_timestamp(time)