def entry_process(data_parse, usuario, purpose): rets = [] t_threads = [] t_threads.append(thread_cntl()) onto_basis2 = [] ontos = get_layouts(usuario, purpose) for onto_basisk in ontos: l2 = Identify.prepare_layout(usuario, onto_basisk) onto_basis2.append(l2) onto_basis22 = [] ontos2 = get_layouts2(usuario, purpose) for onto_basisk in ontos2: l2 = Identify.prepare_layout(usuario, onto_basisk) onto_basis22.append(l2) onto = Identify.pre_process_data2(onto_basis2, onto_basis22, data_parse, purpose, id, t_threads[len(t_threads) - 1], []) for s in onto: rets.append(s) return rets
def process_page(all_ps, id, purpose, pgs, finish, th, pg_index_rs, all_size_pg, job_index, addresses, result_onto_tree_er, c_ontos, usr, termo, uid_S): #try: l_p_ant = None if True: ln_o = '' endereco_url = all_ps[0] result_onto_tree_er = [] #=== progress = int(pg_index_rs / all_size_pg) #if True: for lines_doc2_ in all_ps: #lines_doc2_=all_ps; #try: print 'Start page:', pg_index_rs, ' of total:', all_size_pg if True: endereco_url = lines_doc2_[0] lines_doc2 = lines_doc2_[1] uid_DS = uid_S print 'UID:', uid_DS, '-----------------------' if True: #============= parse fuzzy =========================================== t_threads = [] ret_ps = [] indice_linha = 0 inds2p = 0 if True: #for s in lines_doc2: indice_linha += 1 addresses.append(endereco_url) print 'Preprocessdata in page:', pg_index_rs, ' of total:', all_size_pg, ' line:', indice_linha, ' of:', len( lines_doc2) if inds2p > 100: pro = (indice_linha * 1.0) / len(lines_doc2) print 'TraceQI:', pro inds2p = 0 inds2p += 1 for [onto_basis2, onto_basis22, purpose] in c_ontos: ret_ps.append([]) t_threads.append(thread_cntl()) print 'Start Identify->pre_process_data():', purpose ret_ps[len(ret_ps) - 1] = Identify.pre_process_data2( onto_basis2, onto_basis22, lines_doc2, purpose, id, t_threads[len(t_threads) - 1], []) print 'End Identify->pre_process_data():' #-> ret_ps = layers da pagina result_onto_tree_er.append([ret_ps, endereco_url, uid_DS]) print 'DBG:', result_onto_tree_er #================ indc = 0 extdt = [] for sb in result_onto_tree_er: [ret_psk, endereco_url, uid_DS] = sb indc += 1 #post page indc2 = 0 for [lays, purpose] in ret_psk: for lay in lays: indc2 += 1 # cada layer é uma sentenca try: #print 'Post layer',lay.name,'--------------------------------------','->',uid_DS, ' Result onto:',indc,' of ',len(result_onto_tree_er),' Pg:',indc2, ' of ',len(ret_psk)*len(lays) #for tps in lay.topicos: # print tps.dt # print 'SNS:-------------------' # for t in tps.sinapses: # print t.nr.dt # print 'SNS(END):-------------------' #print '------------------------------' post_object_by_data3(lay, uid_DS, usr, termo, [], purpose, indc2, l_p_ant, extdt) except Exception, errc: sw = 'Error on post object:' log.exception(sw)
def process_termo(termo,usr,purp,start_c,path_j): # montar ontlogia layouts_f=get_layouts(usr,purp) layouts_f2=get_layouts2(usr,purp) t_threads=[] t_threads.append(thread_cntl()) ret_ps=[] ret_ps.append([]) lines_doc2=termo id=usr onto_basis2=[] for onto_basisk in layouts_f: l2=Identify.prepare_layout(id,onto_basisk) onto_basis2.append(l2) onto_basis22=[] for onto_basisk in layouts_f2: l2=Identify.prepare_layout(id,onto_basisk) onto_basis22.append(l2) ret_ps[len(ret_ps)-1]=Identify.pre_process_data2(onto_basis2,onto_basis22,lines_doc2,purp,id,t_threads[len(t_threads)-1],[]) objs_search2=[] complements=[] #= objs_search=[] purposes=get_purpsz(usr,purp) print 'purposes:[',purposes,']' for lays in ret_ps: for lay in lays: for top in lay.topicos: for dts in top.dt: if dts.upper() == "IDENTIFICADOR" or dts.lower() in ['action' ] : for s in top.sinapses: for dts2 in s.nr.dt: if dts2 not in ['.',':','\'','"','?','?']: objs_search.append(dts2) print 'Identify:',dts2 if dts.upper() == "REALID" or dts.upper() == "REALID2": for s in top.sinapses: for dts2 in s.nr.dt: if dts2 not in ['.',':','\'','"','?','?']: objs_search2.append(dts2) print 'RealID:',dts2 #================================== print 'Objs:Search:',objs_search if len(objs_search2) > 0 or len(objs_search) > 0 : opener = urllib2.build_opener() def pg_open(addresss,th,pages,pgind,ind_emit,start_c,total_p): try: ind=0 inds=0 acumul=0 print 'Process init open page:',len(addresss) total_p=total_p/len(addresss) for address in addresss: try: ind+=1 inds+=1 lines_doc=[] if address != 'debug-url': if inds > 2: print 'traceq i:',total_p if traceQ(acumul,usr) : acumul=0 inds=0 else: acumul+=total_p #====================== address=urllib.quote(address) address=address.replace('%3A',':') url='http://www.mind-net.com/get_Text.php?q='+address print 'Open page:',url content = opener.open(url, '' ).read() tmpd=content.replace('\n',' ') lines_doc.append(tmpd) #tmpd='' #for d in content: # if d == '\n': # lines_doc.append(tmpd) # tmpd='' # else: # tmpd+=d #============ pages.append(Task_C(pg_add,lines_doc)) print 'Get content for page:',pgind,' was finished.Len:',len(lines_doc) pgind+=1 else: for line_deb in entry_doc: lines_doc.append(line_deb) pages.append(Task_C(pg_add,lines_doc)) print 'Get content for page:',pgind,' was finished.Len:',len(lines_doc) pgind+=1 except Exception,e: print 'Error PG_OPEN_I',e,'..' th.finished=True except Exception,e: print 'Error PG_OPEN',e,'..' #logs.exception( 'Error process sentences->pg_open:' ) #logs.exception('[Layout(p) Exec Error]Stack execution---------------------------------') th.finished=True