Beispiel #1
0
def runc_layer(layer):
    objects=[]
    ref_opcode=''
    # Relaction-oper-opcode opcode original dos qualificadores se nao especificado
    #=====================================================
    def get_o_ob():
        for s in objects:
            if s.name==ob: return s
        return None
    #=====================================================

    if True:
       print 'RTS-REF(1):------------'
       r=layer
       for s in r.topicos:
         print 'Topico:',s.dt
         print 'SNS:++++++++++++++++++'
         for s1 in s.sinapses:
           print s1.nr.dt
         print '++++++++++++++++++'
       print 'RTS-REF(1)(END):------------'

    obj_foco=[]
    #print 'N tops:' , len(layer.topicos)
    for t in layer.topicos:
        tp=t
        nr_t=t.dt[0]
        if nr_t == '': continue
        if 'referencial.source' in t.dt :
         sn_dt=''
         for sn in tp.sinapses:
          if 'indicador' not in sn.nr.dt:
           for s1 in sn.nr.dt:
            sn_dt+=(s1+' ')
           
         if umisc.trim(sn_dt) != '$$id$$' and umisc.trim(sn_dt) != '':
          print 'referencial.source:',sn_dt
          if len(obj_foco) > 0:
           lay=obj_foco[0]
           lay.name+=(sn_dt)
          else:
           lay=mdNeural.mdLayer()
           lay.name=sn_dt
           #====
           objects.append(lay)
           obj_foco.append(lay)
    rel1=False
    for t in layer.topicos:
       topico_rsf=t
       #==============================================
       for dt_top in topico_rsf.dt:
        if dt_top in [ 'referencial'] or ref_opcode in [ 'refer']:
             for ob1 in obj_foco:
              ob1.set_topico_nr(topico_rsf)
             break
    print 'Obj.foco.refer:',obj_foco
    return obj_foco
Beispiel #2
0
def get_db_pages(usr2):
    resultSet = conn.sql("select pg from know_pages where USERNAME='******' order by i")
    typ = []
    for results in resultSet:
        ts = results[0].read()
        if umisc.trim(ts) != '' and umisc.trim(ts) != '\n' and umisc.trim(
                ts) != '\r':
            typ.append(ts)
    print 'Reuse pgs:', len(typ)
    return typ
Beispiel #3
0
        def pg_open(addresss, th, pages, pgind, ind_emit, start_c):
            print 'Start read page:', addresss
            try:
                for address in addresss:
                    lines_doc = []
                    links_k2 = []
                    if address != 'debug-url':
                        #======================
                        #opener = urllib2.build_opener()
                        address = urllib.quote(address)
                        #url='http://www.mind-net.com/get_Text.php?q='+address
                        pg_add = address
                        #content = opener.open(url, '' ).read()
                        content = call_text(address)
                        tmpd = ''
                        for d in content:
                            if d == '\n':
                                tmpd = umisc.trim(tmpd)
                                lines_doc.append(tmpd)
                                tmpd = ''
                            else:
                                tmpd += d
                        #======================
                        #opener = urllib2.build_opener()
                        #url='http://www.mind-net.com/get_links.php?q='+address
                        #content = opener.open(url, '' ).read()
                        content = call_links(address)
                        tmpd = ''
                        for d in content:
                            if d == '\n':
                                tmpd = umisc.trim(tmpd)
                                links_k2.append(tmpd)
                                tmpd = ''
                            else:
                                tmpd += d
                        #============
                        pages.append(Task_C(pg_add, lines_doc, links_k2))
                        print 'Get content for page:', pgind, ' was finished.Len:', len(
                            lines_doc), ' links count:', len(links_k2)
                        pgind += 1
                    else:
                        for line_deb in entry_doc:
                            lines_doc.append(line_deb)
                        pages.append(Task_C(pg_add, lines_doc, links_k2))
                        print 'Get content for page:', pgind, ' was finished.Len:', len(
                            lines_doc)
                        pgind += 1

                th.finished = True
            except Exception, er:
                print er, '................'
                th.finished = True
Beispiel #4
0
def post_object_by_data3p(layer, cenario, usr, termo, foco, posted_objs, senti,
                          l_p_ant):
    if layer.name == '': return

    def get_top_level(obj, foc, usr, termo_s):
        rts = []
        resultSet = conn.sqlX(
            "SELECT lev,id_top FROM  SEMANTIC_OBJECT_DT3 where OBJECT = ? and TOPICO= ? and USERNAME = ? and UID= ?  order by LEV ",
            ([obj, foc, usr, termo_s]))
        for results in resultSet:
            i = results[0]
            id_top = results[1]
            rts.append([i, id_top])
        return rts

    #=======================
    nameo = layer.name
    if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n':
        if l_p_ant != None:
            nameo = l_p_ant.name
        if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n':
            return

    fnd_tops = False

    l_p_ant = layer

    print 'POST:LR:', nameo
    print '++------------------------------------------'
    for s in layer.topicos:
        print 'DT:', s.dt
        fnd_tops = True
        for d in s.sinapses:
            print d.nr.dt
    print '++------------------------------------------'

    if not fnd_tops: return

    print 'Post-obj:[', nameo, ']'
    no_post_o = False
    for [s, st] in posted_objs:
        if s == nameo and st == senti:
            no_post_o = True
    posted_objs.append([nameo, senti])
    #==========
    #if not no_post_o and len(layer.topicos)>0:
    if not no_post_o:
        sql1 = "insert into SEMANTIC_OBJECT3(username,objeto,cenar,senti) values(?,?,?,?)"
        try:
            conn.sqlX(sql1, ([usr, nameo, cenario, senti]))
        except Exception, err:
            print 'Erro ao post(OBJECT):', err
Beispiel #5
0
def load_pages_know(usr):
    rts = get_db_pages(usr)
    if len(rts) > 0:
        cnt = 0
        for r in rts:
            cnt += 1
            entry_doc.append([Task_C('debug', r), cnt])
    elif len(rts) == 0:
        file = open("c:\\wamp\\www\Neural\\tst_training.txt")
        cnt = 0
        lsn = ''
        while 1:
            line = file.readline()
            if not line:
                #if umisc.trim(lsn) != '':
                # cnt+=1
                # entry_doc.append([ Task_C('debug',lsn),cnt   ]  )
                break
            #===============================
            s = umisc.trim(line)
            if s == '' or s == '\n':
                cnt += 1
                lsn = lsn.replace('\r', ' ')
                lsn = lsn.replace('\n', ' ')
                entry_doc.append([Task_C('debug', lsn), cnt])
                post_db_page(usr, lsn)
                lsn = ''
            else:
                lsn += (' ' + line)
Beispiel #6
0
def run_cmd():
    try:
        Pyro.core.initClient()
        objectName = 'layoutBean'
        hostname = '79.143.185.3'
        port = '28'
        print 'Creating proxy for object', objectName, ' on ', hostname + ':' + port
        if port:
            URI = 'PYROLOC://' + hostname + ':' + port + '/' + objectName
        else:
            URI = 'PYROLOC://' + hostname + '/' + objectName
        print 'The URI is', URI
        proxy = Pyro.core.getProxyForURI(URI)
        #==
        list = proxy.process_cmd('get_page', '', '')
        print 'List.getted!!'
        list = umisc.trim(list)
        if len(list) < 5:
            return
        params = list
        cmd = 'python entry_SemaIndexerStage1.py ' + params
        print cmd
        os.system(cmd)
    except:
        log.exception("ERROR==============")
        time.sleep(2)
Beispiel #7
0
def sep_multiple_data_unkwon(dt):

    dts = parse_t(dt)

    l = len(dts) - 1

    rt = []

    while l >= 0:

        if dts[l] in ['o', 'a', 'os', 'as']:

            rt.insert(0, dts[l])

            break

        rt.insert(0, dts[l])

        l -= 1

    c = ''

    for r in rt:

        c += (' ' + r)

    return umisc.trim(c)
Beispiel #8
0
 def pg_open(address,th,page,pgind,ind_emit,start_c):
  if ind_emit > 0 : pgind=ind_emit
  if start_c > 0 : 
   #print 'Get content for page:',pgind,'\n'
   print 'Get content for page:',pgind
  else:
   print 'Get content for page:',pgind
  
  try:
   opener = urllib2.build_opener()
   address=urllib.quote(address)
   url='http://www.mind-net.com/get_Text.php?q='+address
   content = opener.open(url, '' ).read()
   lines_doc=[]
   tmpd=''
   for d in content:
    if d == '\n':
      tmpd=umisc.trim(tmpd)
      if tmpd.find('http://') > -1:
       ''' '''
      else:
       lines_doc.append(tmpd)
      tmpd=''
    else:
      tmpd+=d    
   page.dt1=pg_add
   page.dt2=lines_doc
   th.finished=True
  except :
   th.finished=True
  print 'Get content for page:',pgind,' was finished.Len:',len(lines_doc)
Beispiel #9
0
def post_links(endereco,termo,usr,purp):
 try:
   sql1="insert into WEB_CACHE_LINKS (URL,TERMO,PURPOSE,USR,PROCESSED) values(%s,%s,%s,%s,'N')"
   if umisc.trim(endereco) != '':
    cursorpostl.execute (sql1,(MySQLdb.escape_string(endereco),MySQLdb.escape_string(termo),purp,usr))
 except:
  pass
Beispiel #10
0
 def pg_open(address,th,page,pgind,ind_emit,start_c):
  if ind_emit > 0 : pgind=ind_emit
  if start_c > 0 : 
   #print 'Get content for page:',pgind,'\n'
   print 'Get content for page:',pgind
  else:
   print 'Get content for page:',pgind
  
  try:
   lines_doc=[]
   if address != 'debug-url':
    opener = urllib2.build_opener()
    address=urllib.quote(address)
    url='http://www.mind-net.com/get_Text.php?q='+address
    content = opener.open(url, '' ).read()
    tmpd=''
    for d in content:
     if d == '\n':
      tmpd=umisc.trim(tmpd)
      if tmpd.find('http://') > -1:
       ''' '''
      else:
       lines_doc.append(tmpd)
      tmpd=''
     else:
      tmpd+=d    
   else:
    for line_deb in entry_doc:
     lines_doc.append(line_deb)
   page.dt1=pg_add
   page.dt2=lines_doc
   th.finished=True
  except Exception,er :
   print er,'................'
   th.finished=True
Beispiel #11
0
def get_db_pages(usr2,pg_ex,connc):
  
  def fecha_pagina(uid):
    print 'Close MSG:',uid
    s=w_cache3.find({'doc_id':uid})
    s[u'PROCESSED']='S'
    w_cache3.update({'_id':s['_id']},s)
 
  
  def remote_f():
        print 'Getting remote-pages...'
        return  proxy.get_pages('',usr2)
  
  if RemoteL:
    return remote_f()
  
  print 'PG_EX',pg_ex,len(pg_ex)  
  
  pgs_exs= pg_ex.split(',')
 
  #resultSet = connc.sql ("select pg,i,title from web_cache3 where USR='******' and i in( "+pg_ex+" ) order by i") 
   
  resultSet=[]   
  for p1 in pgs_exs:
        rg=None
        for d in w_cache3.find({'doc_id':p1}):
         rg=d
        if rg != None: 
         try:
          rtc=str(rg[u'pg'])
          rtc2=str(rg[u'title'])        
          resultSet.append( [ rtc1.encode('latin-1'),p1,rtc2.encode('latin-1') ] )
         except:
          print 'Error.get.pg:',rg
          log.exception("")
          try:
           resultSet.append( [ rg[u'PG'].encode('latin-1'),p1,rg[u'TITLE'].encode('latin-1') ] )
          except:
           print 'Error.get.pg(2):',rg
           log.exception("")
  
          
  typ=[]
  print 'Collect pg:',pg_ex
  #===============================================  
  #typ.append(['O perfil da empresa no Twitter foi criado em 20 de Fevereiro de 2008.',35835 ])
  #return typ
  #================================================
  for [ts,ids,ids2] in resultSet:
    if ids2 == None: ids2= ''
    if umisc.trim(ids2) != '':
            ts=(ids2+': '+ts)
    #if umisc.trim(ts) != '' and umisc.trim(ts) != '\n' and umisc.trim(ts) != '\r':
    typ.append([ts,ids])    
    if re_post and not fs_teste:
     fecha_pagina(ids)
    print 'Read page',ids #,ts
  print 'Reuse pgs:',len(typ)
  return  typ 
Beispiel #12
0
def post_cn(its):
 f=open("/Neural/txt_logs","w")
 print 'POST.LEN:',len(its) 
 for [PG,PROCESSED,TERM,USR,PURPOSE,URL_ICON,URL_PICTURE,ID_USR,NAME_USR,STORY,TITLE,DOC_ID,TP,PHONE,STREET,CITY,COUNTRY,ZIP,LATITUDE,LONGITUDE,TPS,URL] in its:  
      c1=str(PG)
      if umisc.trim(c1) != '':      
       f.write( str(PG)+'|'+str(PROCESSED)+'|'+str(TERM)+'|'+str(USR)+'|'+str(PURPOSE)+'|'+str(URL_ICON)+'|'+str(URL_PICTURE)+'|'+str(ID_USR)+'|'+str(NAME_USR)+'|'+str(STORY)+'|'+str(TITLE)+'|'+str(DOC_ID)+'|'+str(TP)+'|'+str(PHONE)+'|'+str(STREET)+'|'+str(CITY)+'|'+str(COUNTRY)+'|'+str(ZIP)+'|'+str(LATITUDE)+'|'+str(LONGITUDE)+'|'+str(TPS)+'|'+str(URL)+'|'+'\n'   )
 f.close()     
Beispiel #13
0
def get_aliases_ob( ):    
  str_ret=[]
  for ir in layer_processes.lrs:
      print 'get_aliases_ob()->ir(1):',ir
      #if ir != None: print 'lr:',ir.name
      for topico in ir.topicos:
       if len(topico.dt) > 0 :
        topicodt=topico.dt
        if 'identificador' in topicodt or 'realid' in topicodt or 'realid2' in topicodt  or 'object' in topicodt: 
         dtk=''
         for p in topico.sinapses:
          for dts1 in p.nr.dt:                
           dtk+=' '+umisc.trim(dts1)
         if umisc.trim(dtk) != '': 
           print 'Collect.element:',dtk
           str_ret.append(umisc.trim(dtk) )
  return str_ret
Beispiel #14
0
def post_pagina(endereco,conteudo_i,termo,usr,purp):
 try:
   conteudo=''
   for l in conteudo_i:
    conteudo+=(l+'\n')
   if umisc.trim(conteudo) != '':
    sql1="insert into WEB_CACHE (URL,PG,TERMO,PURPOSE,USR,SEMA_RESUME) values(%s,%s,%s,%s,%s,'')"
    cursorpostp.execute (sql1,(MySQLdb.escape_string(endereco),MySQLdb.escape_string(conteudo),MySQLdb.escape_string(termo),purp,usr))
 except :
   pass
Beispiel #15
0
def post_object_by_data_es(layer,usr): 
 nameo=layer.name
 nameo=umisc.trim(nameo)
 clear_obj(usr,nameo)
 print 'Post LR:',nameo,',len:',len(nameo),', usr:'******''
   for d in tp.dt:
    tp_Dt+=d
   tp_name=tp_Dt
   for sn in tp.sinapses:
    sn_dt=''
    for s1 in sn.nr.dt:
     sn_dt+=s1
    sql1="insert into "+mdTb.table_dt+"(username,\"UID\",dt,topico,LEV,SIN,ID_TOP) values(?,?,?,?,?,'Composicao',1)"
    try:
     conn.sqlX (sql1,([usr,uid,sn_dt,tp_Dt,level]))
    except:
     print 'Erro ao post:',nameo,tp_Dt,sn_dt
     log.exception("--------------------------")
    #==========
    post_nr(usr,sn.nr,level+1)
  #==========    ===============================================
 
   
  post_nr(usr,tp)
 
 #===============================================
 for lnk in layer.links:
  sqlc='insert into  '+mdTb.table_relaction+'(OBJ_ORIG,OBJ_DEST,OPCODE,USERNAME,FOCO,FOCO_D,\"UID\") values(?,?,?,?,?,?,?)' 
  #====================   
  def get_nr_dts1(nrs):
   d=''
   for nr in nrs:
    for n in nr.dt:
     d+=n
    d+=','
   return d
  #====================   
  foco_o=get_nr_dts1(lnk.foco_o)
  foco_d=get_nr_dts1(lnk.foco_d)
  conn.sqlX (sqlc,([nameo,lnk.lr.name,lnk.opcode,usr,foco_o,foco_d,0]))
  #===============
  post_object_by_data_es(lnk.lr,usr)   
Beispiel #16
0
def get_db_pages(usr2):

  def remote_f():
        print 'Getting remote-pages...'
        return  proxy.get_pages('',usr2)
  
  if RemoteL:
    return remote_f()
  
  
  resultSet = conn.sql ("select pg,title,story,i from web_cache where USERNAME='******' order by i") 
  #resultSet = conn.sql ("select pg,title,story,i from web_cache where USERNAME='******' and i = 96 order by i") 
  typ=[]
  for results in resultSet:
    ts=results[0].read()
    ids=results[1]
    if umisc.trim(ts) != '' and umisc.trim(ts) != '\n' and umisc.trim(ts) != '\r':
     typ.append([ts,ids])    
    if len(typ) > 100: break     
  print 'Reuse pgs:',len(typ)
  return  typ 
def get_aliases_ob():
    str_ret = []
    for ir in layer_processes.lrs:
        print 'get_aliases_ob()->ir(1):', ir
        for topico in ir.topicos:
            if len(topico.dt) > 0:
                topicodt = topico.dt
                if 'identificador' in topicodt or 'realid' in topicodt or 'realid2' in topicodt or 'object' in topicodt:
                    tmpcs = ''
                    for p in topico.sinapses:
                        for dts1 in p.nr.dt:
                            tmpcs += ' ' + dts1
                    str_ret.append(umisc.trim(tmpcs))
    return str_ret
Beispiel #18
0
def post_object_by_data_es(layer, usr):
    cenario = 0
    senti = 0
    #def post_object_by_data3p( , , ,,, ,):
    print 'Post LR:', nameo, ',len:', len(nameo), ', usr:'******'Post layer:', nameo

    fnd_tops = False

    geral_uuid = cenario
    print 'POST:LR:', nameo
    print '++------------------------------------------'
    for s in layer.topicos:
        print 'DT:', s.dt
        fnd_tops = True
        for d in s.sinapses:
            print d.nr.dt
    print '++------------------------------------------'

    if not fnd_tops: return
    ky1 = nameo + ' ' + str(cenario)
    nameo = ky1
    print 'Post-obj:[', nameo, ']'
    #==========
    #if not no_post_o and len(layer.topicos)>0:
    print 'Insert-OBJ:'
    #==
    if not no_post_o:
        #sql1="insert into SEMANTIC_OBJECT3(username,objeto,cenar,senti) values(?,?,?,?)"
        try:
            cols = {
                "username": usr,
                "objeto": ky1,
                "cenar": str(cenario),
                "sento": str(senti)
            }
            tb_object.insert(ky1, cols)
            #conn.sqlX (sql1,([usr,nameo,cenario,senti]))
        except Exception, err:
            print 'Erro ao post(OBJECT):', err
Beispiel #19
0
 def process_page(lines_doc2,id,purpose,pgs,finish,th):
  ln_o=''
  try:
    l2=Identify.prepare_layout(id,purpose)
    if True :
     for s in lines_doc2:
      ln_o+=' '+s
     if umisc.trim(ln_o) == '':
      finish.finished=True
      return 
     ir=Identify.process_data(l2,ln_o,onto_basis,purpose,id,th) 
     if ir[0] != None :
      result_onto_tree_er.append(ir[0])
     if ir[1] != None:
      result_onto_tree_bpm.append(ir[1])   
    finish.finished=True
  except:
    finish.finished=True    
  print 'Thread ',pgs,' was finished.','Len:',len(ln_o),' process:',start_c/10
Beispiel #20
0
def collect_vitrines():
    import pycassa
    from pycassa.pool import ConnectionPool
    from pycassa.columnfamily import ColumnFamily
    from pycassa import index
    pool2 = ConnectionPool('MINDNET', ['79.143.185.3:9160'], timeout=100000)
    tab2 = pycassa.ColumnFamily(pool2, 'cache_products')
    pub = pycassa.ColumnFamily(pool2, 'to_posting2')
    wb2 = pycassa.ColumnFamily(pool2, 'web_cache1')  # lugar para indexar
    #=================================
    counter = 0
    for ky, col in pub.get_range():
        prods = col['id_product'].decode('hex')
        rc = tab2.get(prods)
        from_id3 = col['from_id']
        from_id3 = from_id3.replace('\'', '')
        from_id3 = from_id3.replace('[', '')
        from_id3 = from_id3.replace(']', '')
        from_id2 = from_id3.split(',')
        from_id = ''
        for ifr in from_id2:
            ifr = umisc.trim(ifr)
            if len(ifr) > 1:
                if ifr[0] in [
                        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
                ] and ifr[1] in [
                        '0', '1', '2', '3', '4', '5', '6', '7', '8', '9'
                ]:
                    from_id = ifr
                    break
        counter += 1
        col['id_post'] = str(counter)
        url_lomadee = rc['lomadee_url']
        pub.insert(ky, col)
        prod = rc['cod_p']
        rccc = wb2.get(prods.encode('hex'))
        rccc['lomadee_url'] = rc['lomadee_url']
        #
        gerate_insert_client(from_id, prod, rccc)
        #============================
    return products_client
Beispiel #21
0
def process_p(arrpg1,dm):
 print 'pages ok. init process ...'
 thst=[]
 indc=0
 for pg in arrpg1:
   indc+=1
   [u,uip]=pg
   if umisc.trim(u)=='': continue
   thst.append(thread_cntl())
   params=get_feeds(u,0,'',None,[],uip,u)
   #==========================
   t = Thread(target=run_cmd, args=(params,thst[len(thst)-1]))
   t.start()
  
 
 while True:
  fndac=False
  for t in thst:
   if not t.finished: fndac=True
  time.sleep(1)
  if not fndac: break  
Beispiel #22
0
        def pg_open(addresss, th, pages, pgind, ind_emit, start_c):
            try:
                for address in addresss:
                    lines_doc = []
                    if address != 'debug-url':
                        opener = urllib2.build_opener()
                        address = urllib.quote(address)
                        print 'Open page:', address
                        url = 'http://www.mind-net.com/get_Text.php?q=' + address
                        content = opener.open(url, '').read()
                        tmpd = ''
                        for d in content:
                            if d == '\n':
                                tmpd = umisc.trim(tmpd)
                                if tmpd.find('http://') > -1:
                                 ''' '''
                                else:
                                    lines_doc.append(tmpd)
                                tmpd = ''
                            else:
                                tmpd += d
                        #============
                        pages.append(Task_C(pg_add, lines_doc))
                        print 'Get content for page:', pgind, ' was finished.Len:', len(
                            lines_doc)
                        pgind += 1
                    else:
                        for line_deb in entry_doc:
                            lines_doc.append(line_deb)
                        pages.append(Task_C(pg_add, lines_doc))
                        print 'Get content for page:', pgind, ' was finished.Len:', len(
                            lines_doc)
                        pgind += 1

                th.finished = True
            except Exception, er:
                print er, '................'
                th.finished = True
Beispiel #23
0
    def __init__(self, prefixo, sufixo, define, returnsa):
        prefix = []
        defs = []
        sfx = []
        #==================================
        for p in prefixo:
            prefix.append(latinupper(p))
        #==================================
        for d in define:
            if umisc.trim(d) != "":
                defs.append(latinupper(d))
        #==================================
        for sf in sufixo:
            sfxc = []
            for sf2 in sf:
                sfxc.append(latinupper(sf2))
            sfx.append(sfxc)
        #==================================

        self.prefixo = prefix
        self.define = defs
        self.sufixo = sfx
        self.returns = returnsa
Beispiel #24
0
 def get_filter_in_objs(obj, sinapses_consid1, dfin1, canditates, firp):
     resultSet22 = []
     for mn in min_purposes:
         if umisc.trim(mn) == '': continue
         mn = parse_each_topico2(mn, dfin1)
         #print 'start.min.p:',mn
         c2 = mdTb.valida_topico31_2(mn, usr, obj, True, '', 0, 0,
                                     sinapses_consid1)
         #print 'min.p:',mn ,c2
         if len(c2) == 0: return []
         for c1 in c2:
             if c1 in canditates or firp:
                 canditates.append(c1)
         firp = False
     canditates2 = canditates
     canditates = []
     for c in canditates2:
         if not c in canditates:
             canditates.append(c)
     for c in canditates:
         #
         r = [[c, mg.tb_get([tb_object31], c)]]
         resultSet22.append(r)
     return resultSet22
def get_by_keyword(
    is2
):  # busca algumas palavras chave para extrair os 'samples', amostras de codigo para calibrar e treinar o processador fuzzy
    isd = []
    cnc = False
    for ch in is2:
        try:
            cnc = False
            try:
                rows = w_cache.get(ch)
            except:
                cnc = True
            if cnc: continue
            if True:
                #============================
                print 'Print pg:', ch
                #=================
                PG = rows[u'pg']
                PROCESSED = rows[u'processed']
                TERMO = rows[u'termo']
                USR = rows[u'usr']
                PURPOSE = rows[u'purpose']
                URL_ICON = rows[u'url_icon']
                URL_PICTURE = rows[u'url_picture']
                ID_USR = float(rows[u'id_usr'])
                NAME_USR = rows[u'name_usr']
                STORY = rows[u'story']
                TITLE = rows[u'title']
                DOC_ID = rows[u'doc_id']
                TP = rows[u'tp']
                PHONE = ''
                STREET = ''
                CITY = ''
                COUNTRY = ''
                ZIP = ''
                LATITUDE = ''
                LONGITUDE = ''
                TPS = rows['tps']
                URL = rows['url']
                #==========
                if PG != None:
                    pass
                else:
                    PG = ''
                if URL_ICON != None:
                    pass
                else:
                    URL_ICON = ''
                if URL_PICTURE != None:
                    pass
                else:
                    URL_PICTURE = ''
                if STORY != None:
                    pass
                else:
                    STORY = ''
                if TITLE != None:
                    pass
                else:
                    TITLE = ''
                if URL != None:
                    pass
                else:
                    URL = ''

                words = tokeniz(PG)
                fnd = False
                fnd2 = False
                if 'are now friends' in PG:
                    fnd2 = True
                elif 'is now friends with' in PG:
                    fnd2 = True
                elif PG[:7] == 'http://':
                    fnd2 = True
                elif 'likes' in PG:
                    fnd2 = True
                elif '{like}' in PG:
                    fnd2 = True
                #===
                for w in words:
                    if 'quer' in w:
                        fnd = True
                    elif 'precis' in w:
                        fnd = True
                    elif 'poderia' in w:
                        fnd = True
                    elif 'pode' in w:
                        fnd = True
                    elif 'podi' in w:
                        fnd = True
                    elif 'gostar' in w:
                        fnd = True
                    elif 'pensand' in w:
                        fnd = True
                    elif 'comprar' in w:
                        fnd = True
                    elif 'adquirir' in w:
                        fnd = True
                    elif 'pens' in w:
                        fnd = True
                    elif 'pegar' in w:
                        fnd = True
                    elif 'encontr' in w:
                        fnd = True
                    elif 'indicar' in w:
                        fnd = True
                #================================
                if umisc.trim(PG) == '':
                    fnd = False
                if fnd and not fnd2:
                    isd.append([
                        PG, PROCESSED, TERMO, USR, PURPOSE, URL_ICON,
                        URL_PICTURE, ID_USR, NAME_USR, STORY, TITLE, DOC_ID,
                        TP, PHONE, STREET, CITY, COUNTRY, ZIP, LATITUDE,
                        LONGITUDE, TPS, URL
                    ])
                    #apagar o item, passando p tabela processados somente os I,DOC_ID para o processo de reprocessamento nao considerar mais esses documentos
                #===================================================================
                #I=0
                #conn.sqlX('insert into PROC_DS (ID,DOC_ID) values(?,?)',[I,ch])
                proc_ds.insert(ch, {'ch': ch})
                #======================================================================
                w_cache.remove(ch)
        except:
            log.exception("")
            conn.rollback()
            return []
    conn.commit()
    return isd
Beispiel #26
0
def get_ontology_ponderate(
        aliases, min_purposes, max_purposes, usr, dfin,
        sinapses_consid):  # min_purposes=mandatory, max_purposes=max ideal
    rts = []
    firp = True
    resultSet2 = []
    canditates = []
    alias = aliases
    if alias == None:
        alias = '%'
    #======================================================================================================================================
    #======================================================================================================================================
    def get_filter_in_objs(obj, sinapses_consid1, dfin1, canditates, firp):
        resultSet22 = []
        for mn in min_purposes:
            if umisc.trim(mn) == '': continue
            mn = parse_each_topico2(mn, dfin1)
            #print 'start.min.p:',mn
            c2 = mdTb.valida_topico31_2(mn, usr, obj, True, '', 0, 0,
                                        sinapses_consid1)
            #print 'min.p:',mn ,c2
            if len(c2) == 0: return []
            for c1 in c2:
                if c1 in canditates or firp:
                    canditates.append(c1)
            firp = False
        canditates2 = canditates
        canditates = []
        for c in canditates2:
            if not c in canditates:
                canditates.append(c)
        for c in canditates:
            #
            r = [[c, mg.tb_get([tb_object31], c)]]
            resultSet22.append(r)
        return resultSet22

    #======================================================================================================================================
    #======================================================================================================================================
    if aliases == None or umisc.trim(aliases) == '':
        firp = True
        for mn in min_purposes:
            if umisc.trim(mn) == '': continue
            mn = parse_each_topico2(mn, dfin)
            #print 'start.min.p:',mn
            c2 = mdTb.valida_topico31(mn, usr, None)
            #print 'min.p:',mn ,c2
            if len(c2) == 0: return []
            for c1 in c2:
                if c1 in canditates or firp:
                    canditates.append(c1)
            firp = False
        canditates2 = canditates
        canditates = []
        for c in canditates2:
            if not c in canditates:
                canditates.append(c)
        for c in canditates:
            #
            try:
                r = [[[c, mg.tb_get([tb_object31], c)]]]
            except:
                try:
                    r = [[[c, mg.tb_get([tb_object1], c)]]]
                except:
                    #r=[[c,tb_object.get(c)]]
                    pass  # cache temporario tb_object, nao deve haver ponderacoes nesse cache

            resultSet2.append(r)
    else:
        #print 'Collect->get(2):',alias,usr
        #
        ########################################################################
        #if mdTb.Zeus_Mode:
        #  r=[[alias,tb_object1.get(alias)]]
        #else:
        #  r=[[alias,tb_object31.get(alias)]]
        r = get_filter_in_objs(alias, sinapses_consid, dfin, canditates, firp)
        resultSet2.append(r)

    #=============================================
    for resultSet23 in resultSet2:
        for resultSet in resultSet23:
            for key, results in resultSet:
                i = results[u'objeto']
                uid = key
                #====
                avaliable_objs = []
                #===--------------------------------------
                #print 'get_object_by_data29()->(',i,',',usr,',',max_purposes,')'
                ant_z = mdTb.Zeus_Mode
                mdTb.Zeus_Mode = False
                [obj_principal,
                 ratting] = get_object_by_data29(i, usr, max_purposes)
                obj_principal.get_links('')
                mdTb.Zeus_Mode = ant_z
                #if len(obj_principal.topicos) > 0 :
                rts.append([ratting, obj_principal])
                rts.sort()
            #===
    return rts
def post_object_by_data3p(layer, cenario, usr, termo, foco, posted_objs, senti,
                          l_p_ant):
    if layer.name == '': layer.name = 'undef'

    def get_top_level(obj, foc, usr, termo_s):
        rts = []
        cl1 = index.create_index_expression(column_name='OBJECT', value=obj)
        cl2 = index.create_index_expression(column_name='TOPICO', value=foc)
        cl3 = index.create_index_expression(column_name='USERNAME', value=usr)
        cl4 = index.create_index_expression(column_name='UID', value=termo_s)
        clausec = index.create_index_clause([cl1, cl2, cl3, cl4],
                                            count=1000000)
        rest = tb_object_dt.get_indexed_slices(clausec)
        #
        #for results in resultSet:
        for kl, cols in rest:
            i = cols[u'lev']
            id_top = cols[u'id_top']
            rts.append([i, id_top])
        return rts

    #=======================
    nameo = layer.name
    print 'Post layer:', nameo
    if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n':
        if l_p_ant != None:
            nameo = l_p_ant.name
        if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n':
            return

    fnd_tops = False

    l_p_ant = layer
    geral_uuid = cenario
    print 'POST:LR:', nameo
    print '++------------------------------------------'
    for s in layer.topicos:
        print 'DT:', s.dt
        fnd_tops = True
        for d in s.sinapses:
            print d.nr.dt
    print '++------------------------------------------'

    if not fnd_tops: return
    ky1 = nameo + ' ' + str(cenario)
    nameo = ky1
    #
    #nameo = filter(lambda x: x in string.printable, nameo)
    nameo = nameo.replace('\n', '')
    nameo = nameo.replace('\t', '')
    nameo = nameo.replace('.', '')
    nameo = nameo.strip('\n')
    nameo = nameo.strip('\r')
    nameo = nameo.strip('\t')
    nameo = nameo.replace('  ', ' ')
    nameo = nameo.replace('   ', ' ')
    nameo = nameo.replace('    ', ' ')
    nameo = nameo.replace('     ', ' ')
    nameo = nameo.replace('      ', ' ')
    nameo = nameo.replace('       ', ' ')
    nameo = nameo.replace('        ', ' ')
    nameo = nameo.replace('         ', ' ')
    ky1 = nameo
    #
    print 'Post-obj:[', nameo, ']'
    # procurar nas tabs se ja existe e carregar os topicos q tem lá antes de postar os novos
    try:
        print 'get.object.reuse:'
        import mdTb
        layer_ant = mdTb.get_object_by_data(nameo, nameo)
        print 'get.object.reuse.return:', layer_ant
        if layer_ant != None:
            print 'reuse layer(', len(layer.topicos), '):', nameo,
            #layer.dump_layer()
            tps_ant_p = layer.topicos
            layer.topicos = []
            for stop in layer_ant.topicos:
                if len(stop.dt) > 0:
                    if stop.dt[0].lower() == 'identificador': continue
                layer.topicos.append(stop)
            for atop in tps_ant_p:
                layer.topicos.append(atop)
            print 'result:', len(layer.topicos)
            #layer.dump_layer()
    except:
        #log.exception("Error.get.object")
        pass
    #
    no_post_o = False
    for [s, st] in posted_objs:
        if s == nameo and st == senti:
            no_post_o = True
    posted_objs.append([nameo, senti])

    #==========
    #if not no_post_o and len(layer.topicos)>0:
    def post_alldt(arr):
        #=======================
        b = tb_object_dt.batch(queue_size=len(arr))
        for k, cols in arr:
            b.insert(str(k), cols)
        b.send()

    #
    def post_nr(uid, cnt, arr1, usr, tp, level=1, id_top=1, just_sin=False):
        try:
            if not just_sin:
                tp_Dt = ''
                try:
                    for d in tp.dt:
                        if type(d) == type([]):
                            tp_Dt += d[0]
                        else:
                            tp_Dt += d
                except Exception, e:
                    print 'Err:-nr.post(2):', tp.dt, '->', e
                tp_name = tp_Dt
                if len(tp.sinapses) == 0:
                    ##UID,topico,LEV,sin,dt,id_top,username
                    kyl1 = uid + '|' + str(cnt.value())
                    it = {
                        "UID": uid,
                        "topico": tp_Dt,
                        "LEV": "1",
                        "sin": '',
                        "datach": '',
                        "id_top": str(id_top),
                        "username": usr,
                        "cnt": str(cnt.value())
                    }
                    arr1.append([kyl1, it])
                    cnt.inc()
            else:
def get_db_pages(usr2, pg_ex, connc):
    def remote_f():
        print 'Getting remote-pages...'
        return proxy.get_pages('', usr2)

    if RemoteL:
        return remote_f()

    print 'PG_EX', pg_ex, len(pg_ex)

    pgs_exs = pg_ex.split(',')

    #resultSet = connc.sql ("select pg,i,title from web_cache3 where USR='******' and i in( "+pg_ex+" ) order by i")

    resultSet = []
    #==========================================================================================================================================
    #==========================================================================================================================================
    #==========================================================================================================================================
    #==========================================================================================================================================
    #==========================================================================================================================================
    # se implementar testes,adicionar no resultSet
    #resultSet=['','']
    #==========================================================================================================================================
    #==========================================================================================================================================
    #==========================================================================================================================================
    #==========================================================================================================================================
    #==========================================================================================================================================
    if len(resultSet) == 0:
        for p1 in pgs_exs:
            rg = w_cache3.get(p1)
            try:
                pg = rg['PG']
                #
                pg = pg.replace('\n', '')
                pg = pg.replace('\t', '')
                pg = pg.strip('\n')
                pg = pg.strip('\r')
                pg = pg.strip('\t')
                pg = pg.replace('  ', ' ')
                pg = pg.replace('   ', ' ')
                pg = pg.replace('    ', ' ')
                pg = pg.replace('     ', ' ')
                pg = pg.replace('      ', ' ')
                pg = pg.replace('       ', ' ')
                pg = pg.replace('        ', ' ')
                pg = pg.replace('         ', ' ')
                pagina = pg
                #
                pg = rg[u'TITLE']
                pg = pg.replace('\n', '')
                pg = pg.replace('\t', '')
                pg = pg.replace('.', ' ')
                pg = pg.strip('\n')
                pg = pg.strip('\r')
                pg = pg.strip('\t')
                pg = pg.replace('  ', ' ')
                pg = pg.replace('   ', ' ')
                pg = pg.replace('    ', ' ')
                pg = pg.replace('     ', ' ')
                pg = pg.replace('      ', ' ')
                pg = pg.replace('       ', ' ')
                pg = pg.replace('        ', ' ')
                pg = pg.replace('         ', ' ')
                titulo = pg
                resultSet.append([pagina, p1, titulo])
            except:
                pg = rg['pg']
                #
                pg = pg.replace('\n', '')
                pg = pg.replace('\t', '')
                pg = pg.strip('\n')
                pg = pg.strip('\r')
                pg = pg.strip('\t')
                pg = pg.replace('  ', ' ')
                pg = pg.replace('   ', ' ')
                pg = pg.replace('    ', ' ')
                pg = pg.replace('     ', ' ')
                pg = pg.replace('      ', ' ')
                pg = pg.replace('       ', ' ')
                pg = pg.replace('        ', ' ')
                pg = pg.replace('         ', ' ')
                pagina = pg
                #
                pg = rg[u'title']
                pg = pg.replace('\n', '')
                pg = pg.replace('\t', '')
                pg = pg.strip('\n')
                pg = pg.strip('\r')
                pg = pg.strip('\t')
                pg = pg.replace('  ', ' ')
                pg = pg.replace('   ', ' ')
                pg = pg.replace('    ', ' ')
                pg = pg.replace('     ', ' ')
                pg = pg.replace('      ', ' ')
                pg = pg.replace('       ', ' ')
                pg = pg.replace('        ', ' ')
                pg = pg.replace('         ', ' ')
                titulo = pg
                resultSet.append([pagina, p1, titulo])

    typ = []
    print 'Collect pg:', pg_ex
    #===============================================
    #typ.append(['O perfil da empresa no Twitter foi criado em 20 de Fevereiro de 2008.',35835 ])
    #return typ
    #================================================
    for [ts, ids, ids2] in resultSet:
        if ids2 == None: ids2 = ''
        if umisc.trim(ids2) != '':
            ts = (ids2 + ': ' + ts)
        #if umisc.trim(ts) != '' and umisc.trim(ts) != '\n' and umisc.trim(ts) != '\r':
        typ.append([ts, ids])

        print 'Read page', ids  #,ts
    print 'Reuse pgs:', len(typ)
    return typ
                 tp_Dt += d
     except:
         print 'Err:-nr.post(2):', tp.dt
 #=================
 for sn in tp.sinapses:
     sn_dt = ''
     try:
         for s1 in sn.nr.dt:
             if type(s1) == type([]):
                 sn_dt += s1[0]
             else:
                 sn_dt += s1
     except:
         print 'Err:-nr.post:', sn.nr.dt
     #sql1="insert into SEMANTIC_OBJECT_DT3(UID,dt,topico,LEV,sin,id_top,username) values(?,?,?,?,?,?,?)"
     if umisc.trim(sn.opcode) == '':
         sn.opcode = 'Relaction-oper-opcode'
     #====================================================
     #====================================================
     kyl1 = uid + '|' + str(cnt.value())
     it = {
         "UID": uid,
         "topico": tp_Dt,
         "LEV": str(level),
         "sin": sn.opcode,
         "datach": sn_dt,
         "id_top": str(id_top),
         "username": usr,
         "cnt": str(cnt.value())
     }
     arr1.append([kyl1, it])
Beispiel #30
0
def post_object_by_data3p(layer,cenario,usr,termo,foco,posted_objs,senti,l_p_ant): 
 if layer.name == '' : layer.name ='undef'
 def get_top_level(obj,foc,usr,termo_s):
   rts=[]
   cl1 = index.create_index_expression(column_name='OBJECT', value=obj)
   cl2 = index.create_index_expression(column_name='TOPICO', value=foc)
   cl3 = index.create_index_expression(column_name='USERNAME', value=usr)
   cl4 = index.create_index_expression(column_name='UID', value=termo_s)
   clausec = index.create_index_clause([cl1,cl2,cl3,cl4],count=1000000)
   rest=tb_object_dt.get_indexed_slices(clausec)
   # 
   #for results in resultSet:
   for kl,cols in rest:   
      i=cols[u'lev'] 
      id_top=cols[u'id_top'] 
      rts.append([i,id_top])
   return rts
 #=======================  
 nameo=layer.name
 print 'Post layer:',nameo
 if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n' :
  if l_p_ant != None:
    nameo=l_p_ant.name
  if umisc.trim(nameo) == '' or umisc.trim(nameo) == '\n' :  
   return
 
 fnd_tops=False
 
 l_p_ant=layer
 geral_uuid=cenario
 print 'POST:LR:',nameo
 print '++------------------------------------------'
 for s in layer.topicos:
  print 'DT:',s.dt
  fnd_tops=True
  for d in s.sinapses:
   print d.nr.dt
 print '++------------------------------------------'
 
 if not fnd_tops: return
 ky1=nameo+' '+str(cenario) 
 nameo=ky1 
 print 'Post-obj:[',nameo,']'
 no_post_o=False
 for [s,st] in posted_objs:
  if s == nameo and st==senti :
     no_post_o=True
 posted_objs.append([nameo,senti])
 #========== 
 #if not no_post_o and len(layer.topicos)>0:
 def post_alldt(arr):
   #=======================
   b = tb_object_dt.batch(queue_size=len(arr))   
   for k,cols in arr:
    b.insert(str(k),cols) 
   b.send()
 #
 def post_nr(uid,cnt,arr1,usr,tp,level=1,id_top=1,just_sin=False):   
   try:
       if not just_sin:
        tp_Dt=''
        try:
         for d in tp.dt:
           if type(d) == type([] ):
            tp_Dt+=d[0]
           else: 
            tp_Dt+=d
        except Exception,e:
          print 'Err:-nr.post(2):',tp.dt,'->',e
        tp_name=tp_Dt
        if len(tp.sinapses)==0:
         ##UID,topico,LEV,sin,dt,id_top,username 
         kyl1=uid+'|'+str(cnt.value())
         it={"UID":uid,"topico":tp_Dt,"LEV":"1","sin":'',"datach":'',"id_top":str(id_top),"username":usr,"cnt":str(cnt.value())}
         arr1.append( [ kyl1,it ] )
         cnt.inc()
       else: