Example #1
0
def entry_process(data_parse, usuario, purpose):
    rets = []
    t_threads = []
    t_threads.append(thread_cntl())
    onto_basis2 = []
    ontos = get_layouts(usuario, purpose)
    for onto_basisk in ontos:
        l2 = Identify.prepare_layout(usuario, onto_basisk)
        onto_basis2.append(l2)

    onto_basis22 = []
    ontos2 = get_layouts2(usuario, purpose)
    for onto_basisk in ontos2:
        l2 = Identify.prepare_layout(usuario, onto_basisk)
        onto_basis22.append(l2)

    onto = Identify.pre_process_data2(onto_basis2, onto_basis22, data_parse,
                                      purpose, id,
                                      t_threads[len(t_threads) - 1], [])

    for s in onto:
        rets.append(s)

    return rets
def process_page(all_ps, id, purpose, pgs, finish, th, pg_index_rs,
                 all_size_pg, job_index, addresses, result_onto_tree_er,
                 c_ontos, usr, termo, uid_S):
    #try:
    l_p_ant = None
    if True:
        ln_o = ''
        endereco_url = all_ps[0]
        result_onto_tree_er = []
        #===
        progress = int(pg_index_rs / all_size_pg)
        #if True:
        for lines_doc2_ in all_ps:
            #lines_doc2_=all_ps;
            #try:
            print 'Start page:', pg_index_rs, ' of total:', all_size_pg
            if True:
                endereco_url = lines_doc2_[0]
                lines_doc2 = lines_doc2_[1]
                uid_DS = uid_S
                print 'UID:', uid_DS, '-----------------------'
                if True:
                    #============= parse fuzzy ===========================================
                    t_threads = []
                    ret_ps = []
                    indice_linha = 0
                    inds2p = 0
                    if True:
                        #for s in lines_doc2:
                        indice_linha += 1
                        addresses.append(endereco_url)

                        print 'Preprocessdata in page:', pg_index_rs, ' of total:', all_size_pg, ' line:', indice_linha, ' of:', len(
                            lines_doc2)
                        if inds2p > 100:
                            pro = (indice_linha * 1.0) / len(lines_doc2)
                            print 'TraceQI:', pro
                            inds2p = 0
                        inds2p += 1

                        for [onto_basis2, onto_basis22, purpose] in c_ontos:
                            ret_ps.append([])
                            t_threads.append(thread_cntl())
                            print 'Start Identify->pre_process_data():', purpose
                            ret_ps[len(ret_ps) -
                                   1] = Identify.pre_process_data2(
                                       onto_basis2, onto_basis22, lines_doc2,
                                       purpose, id,
                                       t_threads[len(t_threads) - 1], [])
                            print 'End Identify->pre_process_data():'
                        #-> ret_ps = layers da pagina
                    result_onto_tree_er.append([ret_ps, endereco_url, uid_DS])
                    print 'DBG:', result_onto_tree_er
        #================
        indc = 0
        extdt = []
        for sb in result_onto_tree_er:
            [ret_psk, endereco_url, uid_DS] = sb
            indc += 1
            #post page
            indc2 = 0
            for [lays, purpose] in ret_psk:
                for lay in lays:
                    indc2 += 1  # cada layer é uma sentenca
                    try:
                        #print 'Post layer',lay.name,'--------------------------------------','->',uid_DS, ' Result onto:',indc,' of ',len(result_onto_tree_er),' Pg:',indc2, ' of ',len(ret_psk)*len(lays)
                        #for tps  in lay.topicos:
                        # print tps.dt
                        # print 'SNS:-------------------'
                        # for t in tps.sinapses:
                        #  print t.nr.dt
                        # print 'SNS(END):-------------------'
                        #print '------------------------------'
                        post_object_by_data3(lay, uid_DS, usr, termo, [],
                                             purpose, indc2, l_p_ant, extdt)
                    except Exception, errc:
                        sw = 'Error on post object:'
                        log.exception(sw)
Example #3
0
def process_termo(termo,usr,purp,start_c,path_j):
 # montar ontlogia
 layouts_f=get_layouts(usr,purp)
 layouts_f2=get_layouts2(usr,purp)
 t_threads=[]
 
 t_threads.append(thread_cntl())  
 ret_ps=[]
 ret_ps.append([])
 
 lines_doc2=termo
 id=usr
 
 onto_basis2=[]
 for onto_basisk in layouts_f:
     l2=Identify.prepare_layout(id,onto_basisk)
     onto_basis2.append(l2)
     
 onto_basis22=[]
 for onto_basisk in layouts_f2:
     l2=Identify.prepare_layout(id,onto_basisk)
     onto_basis22.append(l2)
 
 
 ret_ps[len(ret_ps)-1]=Identify.pre_process_data2(onto_basis2,onto_basis22,lines_doc2,purp,id,t_threads[len(t_threads)-1],[])

 objs_search2=[]
 complements=[]
 #=
 objs_search=[] 
 purposes=get_purpsz(usr,purp)
 print 'purposes:[',purposes,']'
 
 for lays in ret_ps:
   for lay in lays:
    for top in lay.topicos:
     for dts in top.dt:
      if dts.upper() == "IDENTIFICADOR" or dts.lower() in ['action' ] :
       for s in top.sinapses:
        for dts2 in s.nr.dt:
          if dts2 not in ['.',':','\'','"','?','?']:
           objs_search.append(dts2)
           print 'Identify:',dts2
      if dts.upper() == "REALID" or dts.upper() == "REALID2":
       for s in top.sinapses:
        for dts2 in s.nr.dt:
          if dts2 not in ['.',':','\'','"','?','?']:
           objs_search2.append(dts2)
           print 'RealID:',dts2
 
 #==================================
 print 'Objs:Search:',objs_search
 if len(objs_search2) > 0 or len(objs_search) > 0 :
  opener = urllib2.build_opener()
  def pg_open(addresss,th,pages,pgind,ind_emit,start_c,total_p):
   try:
    ind=0
    inds=0
    acumul=0
    print 'Process init open page:',len(addresss)
    total_p=total_p/len(addresss)
    for address in addresss:
      try:
        ind+=1
        inds+=1
        lines_doc=[]
        if address != 'debug-url':
         if inds > 2:
          print 'traceq i:',total_p
          if traceQ(acumul,usr) :
           acumul=0
          inds=0
         else:
           acumul+=total_p
         #======================  
         address=urllib.quote(address)
         address=address.replace('%3A',':')
         url='http://www.mind-net.com/get_Text.php?q='+address
         print 'Open page:',url         
         content = opener.open(url, '' ).read()
         tmpd=content.replace('\n',' ')
         lines_doc.append(tmpd)
         #tmpd=''
         #for d in content:
         # if d == '\n':
         #  lines_doc.append(tmpd)
         #  tmpd=''
         # else:
         #  tmpd+=d    
         #============
         pages.append(Task_C(pg_add,lines_doc))
         print 'Get content for page:',pgind,' was finished.Len:',len(lines_doc)
         pgind+=1
        else:
         for line_deb in entry_doc:
          lines_doc.append(line_deb)
         pages.append(Task_C(pg_add,lines_doc))
         print 'Get content for page:',pgind,' was finished.Len:',len(lines_doc)
         pgind+=1
      except Exception,e:
       print 'Error PG_OPEN_I',e,'..'
             
    th.finished=True
   except Exception,e:
    print 'Error PG_OPEN',e,'..'
    #logs.exception( 'Error process sentences->pg_open:' )
    #logs.exception('[Layout(p) Exec Error]Stack execution---------------------------------')
    th.finished=True