def reopen(ids): print 're-open:',ids try: conn.sql("insert into to_reopen(ids) values( "+str(ids) +")" ) conn.commit() except: print 'Eror reopen:' log.exception("")
def reopen2(ids): print 're-open:',ids try: conn.sql("update fcb_users set indexed='N' where ID=\'"+str(ids) +'\'' ) conn.commit() except: print 'Eror reopen:' log.exception("")
def reopen(ids): a_lock.acquire() print 're-open:',ids try: conn.sql("update fcb_users set indexed='N' where ID=\'"+str(ids) +'\'' ) conn.commit() except: print 'Eror reopen:' log.exception("") a_lock.release()
def c_reopen(): a_lock.acquire() cur = conn.sql("select i,ids from to_reopen where rowno <= 500") dels = [] for re in cur: i = re[0] dels.append(i) ids = re[1] reopen2(ids) conn.commit() for c in dels: conn.sql("delete from to_reopen where i=" + str(ic)) conn.commit() a_lock.release()
def clean_all(): global usr2 sq = "update web_cache3 set indexed='N' where USR='******' " cursor = conn.sql(sq) #================== sq = " delete from WEB_CACHE3_IDX2__2 " cursor = conn.sql(sq) #================== sq = " delete from WEB_CACHE3_IDX " cursor = conn.sql(sq) #================== sq = " delete from WEB_CACHE3_IDX2 " cursor = conn.sql(sq) conn.commit()
def post_cmd(arr, usr, u_nm): a_lock.acquire() try: print 'insert rows.id:', len(arr) for its in arr: [a, u_id, u_name] = its insere_usr('', u_id, u_name) if True: conn.sql("update fcb_users set indexed='S' where i= " + str(usr)) conn.commit() print 'Close usr:', u_nm except: pass a_lock.release()
def run_layout_parser(lay_names,dts,usr,extdt): #==================================================================================================== #chamar os codigos codes_Result=[] for dt in lay_names: print 'Layout-Code',dt #========================= resultSet = conn.sql ("select CODE from DATA_BEHAVIOUR_CODE_PY where OBJETO='"+dt+"' and USERNAME='******' order by i") for results in resultSet: typ=get_typ(dt,usr) orig_code=results[0] o=clean_s(results[0]) #o=results[0] code=o print 'Code type:',typ if typ == 1: #executavel code+=' \n\nretorno_srt=run(sr_int_cmd_param,ex_Data)' else: # executa somente o codigo pass #================================== try: sr_int_cmd_param=dts ex_Data=extdt exec(code, locals(), locals()) pass except Exception,e: log.exception('[Layout(p) Exec Error]Stack execution---------------------------------') print '[code]','\n',code,'\n\n\n','}' #log.info(code) #==== if typ == 1: #executavel # adicionar ao codes_Result o retorno_srt(lines->[endereco,dados] ) if retorno_srt != None: codes_Result.append( retorno_srt )
def get_typ(obj, usr2): resultSet = conn.sql("select TYP from DATA_BEHAVIOUR_PY where OBJETO='" + obj + "' and USERNAME='******' order by i") typ = 0 for results in resultSet: typ = results[0] return typ
def get_rows(startc): cur = conn.sql( "select user_name,id,u_name,i,indexed from fcb_users where i>= " + str(startc) + " and rowno <= 50000 order by i") dels = [] count = 1 cs = 0 global atu_reg global geral_cnt for re in cur: user_name = re[0] id = re[1] u_name = re[2] if u_name == None: u_name = '' i = re[3] indexed = re[4] cs = i if count % 500 == 0: print 'read.row:', count atu_reg = 'read.row:' + str(count) + ',total:' + str(geral_cnt) #print {'user_name': user_name },{'id':id},{'u_name':u_name} tab2.insert(str(id), { 'user_name': user_name, 'id': id, 'u_name': u_name, 'indexed': indexed }) #if count >5 : return 0 count += 1 geral_cnt += 1 return cs
def get_dist_u_next(): a_lock.acquire() isd=[] try: cursor = conn.sql("SELECT distinct ID,i from fcb_users where indexed='N' and rowno < 2 ") for results in cursor: ids=results[0] i=results[1] isd=[ids,i] conn.sql("update fcb_users set indexed='S' where i= "+str(i)) conn.commit() print 'Close usr(1):',ids break except: pass a_lock.release() return isd
def get_purposes(usr): # retorna os purposes(triggers) de pesquisas, cada um com suas fontes e propositos resultSet =conn.sql ("SELECT DT FROM knowledge_manager where username='******' and typ=4 and dt<>'language' order by i ") p=[] for results in resultSet: purps=results[0] p.append(purps) return p
def get_purposes(usr): cursor = conn.sql("SELECT DT,layout_onto FROM knowledge_manager where username='******' and typ=4 and dt<>'language' order by i") p=[] for results in cursor: purps=results[0] p.append(purps) return p
def index_subs(): c=0 while c < 100000 : [usrs,fc]=get_dist_u() c2=0 for u in usrs: print 'Process usr:'******'Close usr:'******'S' where i= "+str(fc[c2])) conn.commit() err=False except: pass c2+=1 c+=1
def process_py(): tb_py.truncate() tb_py_code.truncate() # r1 = conn.sql("select OBJETO,USERNAME,TYP from DATA_BEHAVIOUR_PY") for re in r1: obj = re[0] usr = re[1] TYP = re[2] params = {"USERNAME": usr, "OBJETO": obj, "TYP": str(TYP)} tb_py.insert(obj, params) r1 = conn.sql("select OBJETO,USERNAME,CODE from DATA_BEHAVIOUR_CODE_PY ") for re in r1: obj = re[0] usr = re[1] code = re[2].read() params = {"USERNAME": usr, "OBJETO": obj, "CODE": code} tb_py_code.insert(obj, params)
def init_nums_i(): try: sq = "select count(*) from web_cache3 where USR='******' and (indexed='S') " cursor = conn.sql(sq) for results in cursor: I = results[0] print 'Rows:', I except Exception, e: print 'Error:', e
def index_subs(): c=0 while c < 1 : [usrs,fc]=get_dist_u() ths=[] for u in usrs: print 'Process usr:'******'',ths[len(ths)-1] ) ) #== except:pass ind_col=0 while True: print 'wait for pages...',len(ths)-ind_col fnds_t=False ind_col=0 for ths1 in ths: if not ths1.finished:fnds_t=True if ths1.finished: ind_col+=1 if fnds_t: time.sleep(10) continue else: break #============================= print 'insert rows.id:',len(to_ins) for its in to_ins: [a,u_id,u_name]=its insere_usr('',u_id,u_name) time.sleep( 2 ) indc2=0 for fcs in fc: print 'Close usr:'******'S' where i= "+str(fcs)) conn.commit() indc2+=1 c+=1 to_ins=[]
def get_dist_u_next2(): cursor = conn.sql( "SELECT distinct ID,i from fcb_users where indexed='N' and rowno <= 5 " ) isd = [] for results in cursor: ids = results[0] i = results[1] isd.append([ids, i]) return isd
def get_dist_u_next2(): a_lock.acquire() isd=[] closes=[] try: cursor = conn.sql("SELECT distinct ID,i from fcb_users where indexed='N' and rowno <= 1 ") isd=[] for results in cursor: ids=results[0] i=results[1] isd.append([ids,i]) closes.append([i,ids]) #============== for [cl,u_nm] in closes: conn.sql("update fcb_users set indexed='S' where i= "+str(cl)) print 'Close usr(2):',u_nm conn.commit() #== except Exception, err2: print 'Error collect:',err2
def get_db_pages(usr2): resultSet = conn.sql("select pg from know_pages where USERNAME='******' order by i") typ = [] for results in resultSet: ts = results[0].read() if umisc.trim(ts) != '' and umisc.trim(ts) != '\n' and umisc.trim( ts) != '\r': typ.append(ts) print 'Reuse pgs:', len(typ) return typ
def get_dist_u(): cursor = conn.sql("SELECT distinct ID,i from fcb_users where indexed='N' and rowno < 50 ") p=[] isd=[] for results in cursor: ids=results[0] i=results[1] p.append(ids) isd.append(i) return [p,isd]
def process_sentences(start_c, usr): resultSet = conn.sql( "SELECT USERNAME,TERMO,TRIGGER_AS FROM clipping_info where USERNAME='******' ") # 50 rows por vez r1 = [] for results in resultSet: username = results[0] termo = results[1] trigger_as = results[2] r1.append([username, termo, trigger_as]) purps = get_purposes( usr ) # purposes-> layouts definidos dentro dos facts dos ractionlines escalados to_run_c = [] print 'Process load layout...' for r in r1: [username, termo, trigger_as] = r #=== print 'Process termo:', termo all_layouts = [] for pur_p in purps: print 'Start purpose-load-layout:', pur_p, '--------------------------------------------------------------' layouts_f = get_layouts(usr, pur_p) layouts_f2 = get_layouts2(usr, pur_p) onto_basis2 = [] for onto_basisk in layouts_f: l2 = Identify.prepare_layout(usr, onto_basisk) onto_basis2.append(l2) onto_basis22 = [] for onto_basisk in layouts_f2: l2 = Identify.prepare_layout(usr, onto_basisk) #print 'Prepare layout(2):',onto_basisk,'->',l2.fzs onto_basis22.append(l2) all_layouts.append([onto_basis2, onto_basis22, pur_p]) print 'End purpose:', pur_p, '--------------------------------------------------------------' # print 'Start process page:---' process_termo(termo, username, pur_p, start_c, '', all_layouts) #========================= if len(entry_doc) > 0: return
def process_sentences(USERN, pth): print 'User:'******':', len(USERN) USERN2 = '\'' + USERN + '\'' sqlcc = ' select USERNAME,TERMO from clipping_info where USERNAME= '******'Term.Len:', len(r1) for r in r1: [username, termo] = r if True: pur_p = termo if finds >= 5: ids = len(ths) ids1 = 0 #======================================== while ids1 < ids: if not ths[ids1].finished: ids1 = 0 time.sleep(5) print 'Wait for....' continue ids1 += 1 ths = [] else: th = thread_cntl() ths.append(th) thread.start_new_thread(run_th, (username, termo, pur_p, th, pth)) finds += 1 if True: ids = len(ths) ids1 = 0 #======================================== while ids1 < ids: if not ths[ids1].finished: ids1 = 0 time.sleep(5) print 'Wait for....' continue ids1 += 1 ths = []
def get_db_pages(usr2): def remote_f(): print 'Getting remote-pages...' return proxy.get_pages('',usr2) if RemoteL: return remote_f() resultSet = conn.sql ("select pg,title,story,i from web_cache where USERNAME='******' order by i") #resultSet = conn.sql ("select pg,title,story,i from web_cache where USERNAME='******' and i = 96 order by i") typ=[] for results in resultSet: ts=results[0].read() ids=results[1] if umisc.trim(ts) != '' and umisc.trim(ts) != '\n' and umisc.trim(ts) != '\r': typ.append([ts,ids]) if len(typ) > 100: break print 'Reuse pgs:',len(typ) return typ
def clean_marcador(): conn.sql("delete from web_cache_sign") conn.commit() # temporario dos usuarios, para coleta e depois limpesa geral conn.sql("delete from web_cache where termo <> 'SYSTEM' and purpose <> 'SYSTEM'") conn.commit()
def get_by_keyword( is2 ): # busca algumas palavras chave para extrair os 'samples', amostras de codigo para calibrar e treinar o processador fuzzy isd = [] try: cursor = conn.sql( "SELECT PG,PROCESSED,TERMO,USR,PURPOSE,URL_ICON,URL_PICTURE,ID_USR,NAME_USR,STORY,TITLE,DOC_ID,TP,PHONE,STREET,CITY,COUNTRY,ZIP,LATITUDE,LONGITUDE,TPS,URL,i from web_cache where i in(" + is2 + ") ") for results in cursor: I = results[22] #============================ #print 'Print pg:',I PG = results[0] PROCESSED = results[1] TERMO = results[2] USR = results[3] PURPOSE = results[4] URL_ICON = results[5] URL_PICTURE = results[6] ID_USR = results[7] NAME_USR = results[8] STORY = results[9] TITLE = results[10] DOC_ID = results[11] TP = results[12] PHONE = results[13] STREET = results[14] CITY = results[15] COUNTRY = results[16] ZIP = results[17] LATITUDE = results[18] LONGITUDE = results[19] TPS = results[20] URL = results[21] #========== if PG != None: PG = PG.read() else: PG = '' if URL_ICON != None: URL_ICON = URL_ICON.read() else: URL_ICON = '' if URL_PICTURE != None: URL_PICTURE = URL_PICTURE.read() else: URL_PICTURE = '' if STORY != None: STORY = STORY.read() else: STORY = '' if TITLE != None: TITLE = TITLE.read() else: TITLE = '' if URL != None: URL = URL.read() else: URL = '' words = tokeniz(PG) fnd = False fnd2 = False if 'are now friends' in PG: fnd2 = True elif 'is now friends with' in PG: fnd2 = True elif PG[:7] == 'http://': fnd2 = True elif 'likes' in PG: fnd2 = True elif '{like}' in PG: fnd2 = True #=== for w in words: if 'quer' in w: fnd = True elif 'precis' in w: fnd = True elif 'poderia' in w: fnd = True elif 'pode' in w: fnd = True elif 'podi' in w: fnd = True elif 'gostar' in w: fnd = True elif 'pensand' in w: fnd = True elif 'comprar' in w: fnd = True elif 'adquirir' in w: fnd = True elif 'pens' in w: fnd = True elif 'pegar' in w: fnd = True elif 'encontr' in w: fnd = True elif 'indicar' in w: fnd = True #================================ if umisc.trim(PG) == '': fnd = False if fnd and not fnd2: isd.append([ PG, PROCESSED, TERMO, USR, PURPOSE, URL_ICON, URL_PICTURE, ID_USR, NAME_USR, STORY, TITLE, DOC_ID, TP, PHONE, STREET, CITY, COUNTRY, ZIP, LATITUDE, LONGITUDE, TPS, URL ]) #apagar o item, passando p tabela processados somente os I,DOC_ID para o processo de reprocessamento nao considerar mais esses documentos conn.sqlX('insert into PROC_DS (ID,DOC_ID) values(?,?)', [I, DOC_ID]) conn.sqlX('delete from web_cache where I=?', [I]) except: log.exception("") conn.rollback() return [] conn.commit() return isd
def get_by_keyword( ): # busca algumas palavras chave para extrair os 'samples', amostras de codigo para calibrar e treinar o processador fuzzy isd = [] try: cursor = conn.sql( "SELECT PG,PROCESSED,TERMO,USR,PURPOSE,URL_ICON,URL_PICTURE,ID_USR,NAME_USR,STORY,TITLE,DOC_ID,TP,PHONE,STREET,CITY,COUNTRY,ZIP,LATITUDE,LONGITUDE,TPS,URL,i from web_cache where rowno <= 50 " ) for results in cursor: I = results[22] #============================ print 'Print pg:', I PG = results[0] PROCESSED = results[1] TERMO = results[2] USR = results[3] PURPOSE = results[4] URL_ICON = results[5] URL_PICTURE = results[6] ID_USR = results[7] NAME_USR = results[8] STORY = results[9] TITLE = results[10] DOC_ID = results[11] TP = results[12] PHONE = results[13] STREET = results[14] CITY = results[15] COUNTRY = results[16] ZIP = results[17] LATITUDE = results[18] LONGITUDE = results[19] TPS = results[20] URL = results[21] #========== if PG != None: PG = PG.read() else: PG = '' if URL_ICON != None: URL_ICON = URL_ICON.read() else: URL_ICON = '' if URL_PICTURE != None: URL_PICTURE = URL_PICTURE.read() else: URL_PICTURE = '' if STORY != None: STORY = STORY.read() else: STORY = '' if TITLE != None: TITLE = TITLE.read() else: TITLE = '' if URL != None: URL = URL.read() else: URL = '' words = tokeniz(PG) fnd = False for w in words: if 'quer' in w: fnd = True elif 'precis' in w: fnd = True elif 'poderia' in w: fnd = True elif 'pode' in w: fnd = True elif 'podi' in w: fnd = True elif 'gostar' in w: fnd = True elif 'pensand' in w: fnd = True elif 'comprar' in w: fnd = True elif 'adquirir' in w: fnd = True #================================ if fnd: ids.append([ PG, PROCESSED, TERM, USR, PURPOSE, URL_ICON, URL_PICTURE, ID_USR, NAME_USR, STORY, TITLE, DOC_ID, TP, PHONE, STREET, CITY, COUNTRY, ZIP, LATITUDE, LONGITUDE, TPS, URL ]) except: log.exception("") return isd
def get_fuzzy(name, user): ''' ''' self_name = name self_id = user affinity = self_name print 'Process-Lay:', affinity, self_id sql1 = "SELECT fzname,force_position,mandatory,direction,an FROM fuzzy_store where layout_onto='" + affinity + "' and username='******' order by sq desc " resultSet = conn.sql(sql1) aresults = [] for results in resultSet: fzname = results[0] print 'Get-FZ:', fzname force_position = (umisc.trim(results[1]).upper() == "Y" or umisc.trim(results[1]).upper() == "S") mandatory = (umisc.trim(results[2]).upper() == "Y" or umisc.trim(results[2]).upper() == "S") direction = umisc.trim(results[3]).upper() f_an = umisc.trim(results[4]).upper() #===-------------------------------------------------- referer = [] resultSet22 = conn.sql("SELECT refer FROM fz_store_refer where fz='" + fzname + "' and username='******' ") for results22 in resultSet22: refs = results22[0] referer.append(refs) #===-------------------------------------------------- sqlr = "SELECT trim(defs),trim(sin_ret),trim(vl_ret),trim( special_direct ) FROM fz_store_defs where fz='" + fzname + "' and username='******' " #print sqlr resultSet2 = conn.sql(sqlr) arround = [] DEFS = [] sinap_result = [] for results2 in resultSet2: returns = [] vl_ret1 = results2[2] special_direct = results2[3] if special_direct == None: special_direct = '' if vl_ret1 == None: vl_ret1 = '' # formato : [ topico,sub,sinapse ][topico,sub,sinapse][topico,sub,sinapse] tuples = [] tmp = '' for s in vl_ret1: if s == '[': tmp = '' elif s == ']': tuples.append(tmp) tmp = '' else: tmp += s for tup in tuples: top = '' sub = '' sin = '' for s in tup: if s == ',': if top == '': top = tmp tmp = '' elif sub == '': sub = tmp tmp = '' else: sin = tmp tmp = '' else: tmp += s if umisc.trim(tmp) != '': sin = tmp if umisc.trim(top) != '': returns.append([top, sub, sin, special_direct]) #print returns,'...' #---- defs1 = results2[0] ps = [] if len(defs1) > 0: tmp = '' ind = 0 for d in defs1: if d == ',': if defs1[ind - 1] != '\\': ps.append(tmp) tmp = '' elif d != '\\': tmp += d elif d != '\\': tmp += d ind += 1 if umisc.trim(tmp) != '': ps.append(tmp) else: ps.append('') DEFS.append([ps, returns]) sin_ret = results2[1] if umisc.trim(sin_ret) != '': sinap_result.append(sin_ret) #===-------------------------------------------------- resultSet2 = conn.sql( "SELECT trim(pref) FROM fz_store_pref where fz='" + fzname + "' and username='******' ") # 50 rows por vez PREF = [] for results2 in resultSet2: #---- pref = results2[0] ps = [] if len(pref) > 0: tmp = '' ind = 0 for d in pref: if d == ',': if pref[ind - 1] != '\\': ps.append(tmp) tmp = '' else: tmp += d else: tmp += d if tmp != '': ps.append(tmp) else: ps.append('') PREF.append(ps) #===-------------------------------------------------- sqlr = "SELECT trim(sufix) FROM fz_store_sufix where fz='" + fzname + "' and username='******' " resultSet2 = conn.sql(sqlr) # 50 rows por vez SUFX = [] for results2 in resultSet2: #---- sufix = results2[0] ps = [] if len(sufix) > 0: tmp = '' ind = 0 for d in sufix: if d == ',': if sufix[ind - 1] != '\\': ps.append(tmp) tmp = '' else: tmp += d else: tmp += d if tmp != '': ps.append(tmp) else: ps.append('') SUFX.append(ps) #===-------------------------------------------------- ind = 0 sents = [] for cDF in DEFS: PR = [''] SF = [['']] if ind < len(PREF): PR = PREF[ind] if ind < len(SUFX): SF = SUFX DEF = cDF sent = [PR, DEF, SF] sents.append(sent) ind += 1 aresults.append([ fzname, sents, mandatory, referer, force_position, arround, sinap_result, direction, f_an ]) return aresults
def process_wb(): global itemsc web.truncate() # r1 = conn.sql( "select I,PG,PROCESSED,TERMO,USR,PURPOSE,URL_ICON,URL_PICTURE,ID_USR,NAME_USR,STORY,TITLE,DOC_ID,\ TP,PHONE,STREET,CITY,COUNTRY,ZIP,LATITUDE,LONGITUDE,TPS,URL from WEB_CACHE") itemsc = 0 for re in r1: I = str(re[0]) PG = re[1].read() PROCESSED = str(re[2]) TERMO = str(re[3]) USR = str(re[4]) PURPOSE = re[5] if re[6] != None: URL_ICON = re[6].read() else: URL_ICON = '' if re[7] != None: URL_PICTURE = re[7].read() else: URL_PICTURE = '' if re[8] == None: ID_USR = '******' else: ID_USR = str(re[8]) NAME_USR = re[9] if NAME_USR == None: NAME_USR = '******' if re[10] != None: STORY = re[10].read() else: STORY = '' if re[11] != None: TITLE = re[11].read() else: TITLE = '' DOC_ID = str(re[12]) #DOC_ID=str(I) TP = re[13] if TP == None: TP = 'status' PHONE = re[14] if PHONE == None: PHONE = '' STREET = re[15] if STREET == None: STREET = '' CITY = re[16] if CITY == None: CITY = '' COUNTRY = re[17] if COUNTRY == None: COUNTRY = '' ZIP = re[18] if ZIP == None: ZIP = '' LATITUDE = re[19] if LATITUDE == None: LATITUDE = '' LONGITUDE = re[20] if LONGITUDE == None: LONGITUDE = '' TPS = re[21] if TPS == None: TPS = 'W' URL = re[22] #==================== if URL == None: URL = '' else: URL = URL.read() if URL_ICON == None: URL_ICON = '' if URL_PICTURE == None: URL_PICTURE = '' if USR == None: USR = '******' if ZIP == None: ZIP = '' #==================== params = { 'CITY': CITY, 'COUNTRY': COUNTRY, 'DOC_ID': DOC_ID, 'ID_USR': ID_USR, 'LATITUDE': LATITUDE, 'LONGITUDE': LONGITUDE, 'NAME_USR': NAME_USR, 'PG': PG, 'PHONE': PHONE, 'PROCESSED': PROCESSED, 'PURPOSE': PURPOSE, 'STORY': STORY, 'STREET': STREET, 'TERM': TERMO, 'TITLE': TITLE, 'TP': TP, 'TPS': TPS, 'URL': URL, 'URL_ICON': URL_ICON, 'URL_PICTURE': URL_PICTURE, 'USR': USR, 'ZIP': ZIP } web.insert(str(I), params) itemsc += 1