def Identify_pre_process_data(l2, ln_o, onto_basis, purpose, id, t_h, ret_ps):
    Identify.pre_process_data(l2, ln_o, onto_basis, purpose, id, t_h, ret_ps)
Example #2
0
def process_page(all_ps, id, purpose, pgs, finish, th, pg_index_rs,
                 all_size_pg, job_index, addresses, result_onto_tree_er,
                 onto_basis):
    #try:
    if True:
        ln_o = ''
        endereco_url = all_ps[0]
        #===
        progress = int(pg_index_rs / all_size_pg)
        l2 = Identify.prepare_layout(id, purpose)
        for lines_doc2_ in all_ps:
            #try:
            print 'Start page:', pg_index_rs, ' of total:', all_size_pg
            if True:
                endereco_url = lines_doc2_[0]
                lines_doc2 = lines_doc2_[1]
                if True:
                    #============= parse fuzzy ===========================================
                    t_threads = []
                    ret_ps = []
                    indice_linha = 0
                    for s in lines_doc2:
                        indice_linha += 1
                        ln_o = s
                        addresses.append(endereco_url)
                        if umisc.trim(ln_o) == '':
                            continue

                        ret_ps.append([])
                        t_threads.append(thread_cntl())

                        print 'Preprocessdata in page:', pg_index_rs, ' of total:', all_size_pg, ' line:', indice_linha, ' of:', len(
                            lines_doc2)
                        Identify.pre_process_data(
                            l2, ln_o, onto_basis, purpose, id,
                            t_threads[len(t_threads) - 1],
                            ret_ps[len(ret_ps) - 1])
                        print 'Preprocessdata END  in page:', pg_index_rs, ' of total:', all_size_pg, ' line:', indice_linha, ' of:', len(
                            lines_doc2)

                        #thread.start_new_thread(Identify_pre_process_data,(l2,ln_o,onto_basis,purpose,id,t_threads[len(t_threads)-1],ret_ps[len(ret_ps)-1]) )

                    #====================================================================
                    #run rcts linkadas
                    sentence_index = 0
                    for s_ps in ret_ps:
                        sentence_index += 1
                        print 'Process sentence:', sentence_index, ' of :', len(
                            ret_ps
                        ), ' page:', pg_index_rs, ' of total:', all_size_pg
                        ir = Identify.resume_process_data(
                            s_ps, onto_basis, purpose, id)
                        if ir[0] != None:
                            # procura identificador ---
                            fnd_ident = False
                            for es in ir[0].topicos:
                                if ir[0].es_compare_dt(es, 'identificador'):
                                    fnd_ident = True
                            if not fnd_ident:
                                ind = len(result_onto_tree_er) - 1
                                fond_cs = False
                                while ind >= 0 and not fond_cs:
                                    for es2 in result_onto_tree_er[
                                            ind].topicos:
                                        if ir[0].es_compare_dt(
                                                es2, 'identificador'):
                                            ir[0].set_topico_nr(es2)
                                            fond_cs = True
                                            break
                                    ind -= 1

                            # verificar se nao tem somente identificadores(elemento fact invalido)
                            oth = False
                            for es in ir[0].topicos:
                                if ir[0].es_compare_dt(es, 'identificador'):
                                    pass
                                else:
                                    oth = True
                            if not oth:
                                continue
                            result_onto_tree_er.append(ir[0])
                            # procurar group
                            ind = len(result_onto_tree_er) - 1
                            while ind >= 0:
                                if ir[0] != result_onto_tree_er[ind]:
                                    for es2 in result_onto_tree_er[
                                            ind].topicos:
                                        if ir[0].es_compare_dt(
                                                es2, 'identificador'):
                                            for top in ir[0].topicos:
                                                if ir[0].compare_dt_depend(
                                                        conn, usr, purpose,
                                                        es2, top, ['']):
                                                    # encontrou referencias do mesmo identificador, incluir nos objetos linkados
                                                    rt = None
                                                    fnd_new = False
                                                    for k1 in result_linked:
                                                        for k2 in k1:
                                                            if k1 == ir[0]:
                                                                fnd_new = True
                                                                rt = k1
                                                    #=
                                                    if not fnd_new:
                                                        result_linked.append(
                                                            [ir[0]])
                                                        rt = result_linked[len(
                                                            result_linked) - 1]

                                                    #=======================================
                                                    fnd_new = False
                                                    for k2 in rt:
                                                        if k2 == result_onto_tree_er[
                                                                ind]:
                                                            fnd_new = True
                                                    if not fnd_new:
                                                        rt.append(
                                                            result_onto_tree_er[
                                                                ind])
                                ind -= 1

                            #==========================
                        if ir[1] != None:
                            result_onto_tree_bpm.append(ir[1])
            #except Exception ,err:
            # print 'Except on process pages:',err,'pg:',pg_index_rs
            traceQ(progress, id, pg_index_rs, (start_c / 10), endereco_url,
                   'Processed page:')