Example #1
0
    busqueda_xml = etree.parse(xml_file)
except:
    logging.error('Invalid file' + xml_file)
    logging.error('Usage: python unoporuno_import xml_file_name')
    exit(-1)

logging.info('Processing file ' + xml_file)
L = Limpieza()
x_busqueda = busqueda_xml.getroot()
x_nombre = x_busqueda.find('nombre')
x_fecha = x_busqueda.find('fecha')
x_usuario = x_busqueda.find('usuario')
x_descripcion = x_busqueda.find('descripcion')
busqueda = Busqueda()
busqueda.nombre = L.limpia_reservados_xml(x_nombre.text)
busqueda.fecha = x_fecha.text
busqueda.usuario = L.limpia_reservados_xml(x_usuario.text)
busqueda.descripcion = L.limpia_reservados_xml(x_descripcion.text)
busqueda.save()
logging.info('Importing busqueda ' + busqueda.nombre)
x_personas = x_busqueda.find('personas')
x_personas_set = x_personas.findall('person')
limpia = L.limpia_reservados_xml
for x_persona in x_personas_set:
    persona = Persona()
    persona.busqueda = busqueda
    persona.name = limpia(x_persona.find('name').text)
    persona.geo = limpia(x_persona.find('geo').text)
    persona.orgs = limpia(x_persona.find('orgs').text)
    persona.topics = limpia(x_persona.find('topics').text)
    persona.link = limpia(x_persona.find('link').text)
Example #2
0
def main():
    filter_value = None
    colombia = Busqueda()
    colombia.nombre = sys.argv[2]
    results_path = sys.argv[1]
    colombia.fecha = datetime.datetime.now()
    colombia.usuario = '*'
    logging.debug('arguments path:' + sys.argv[1] + ' name: ' + sys.argv[2])
    if len(sys.argv) > 3:
        filter_value = sys.argv[3]
        logging.debug(' filter_value' + filter_value)
    if len(sys.argv) > 4:
        colombia.usuario = sys.argv[4]
        logging.debug('arguments user:'******'arguments desc:' + sys.argv[5])
    results_path += '/results_converging/'
    if os.access(results_path, os.R_OK):
        colombia.save()
    else:
        logging.error('No access to ' + results_path)
        exit(-1)

    for subdirs, dirs, files in os.walk(results_path):
        for file in files:
            re_xml = re.search('\.xml$', file)
            if not re_xml:
                continue
            logging.debug('processing file: ' + file)
            file_path = results_path + file
            person_tree = etree.parse(file_path)
            person = person_tree.getroot()
            x_name = person.find('name')
            x_geo = person.find('places')
            x_orgs = person.find('orgs')
            x_topics = person.find('topics')
            logging.info('processing person: ' + x_name.text)
            w_persona = colombia.persona_set.create(name=x_name.text,
                                                    geo=x_geo.text,
                                                    orgs=x_orgs.text,
                                                    topics=x_topics.text)
            converging_pipelines = person.findall('converging_pipelines')
            for pipeline in converging_pipelines:
                n = pipeline.get('number')
                n_converging = int(n)
                logging.debug('Processing converging pipeline number ' + n)
                snippets_root = pipeline.find('snippets')
                snippets = snippets_root.findall('snippet')
                for x_snippet in snippets:
                    x_query = clean_xml(x_snippet.find('query').text)
                    x_title = clean_xml(x_snippet.find('title').text)
                    x_description = clean_xml(
                        x_snippet.find(
                            'description').text)[:SNIPPET_DESCRIPTION_LENGTH]
                    x_link = clean_xml(
                        x_snippet.find('link').text)[:SNIPPET_LINK_LENGTH]
                    x_query_type = x_snippet.find('query_type').text
                    logging.debug('processing snippet ' + x_query + '::' +
                                  x_title)
                    logging.debug('snippet description::' + x_description)
                    logging.debug('snippet link::' + x_link)
                    logging.debug('snippet query_type::' + x_query_type)
                    x_filters = x_snippet.find('filters')
                    x_FG = x_filters.get('FG')
                    x_ESA = x_filters.get('ESA')
                    x_RE = x_filters.get('RE')
                    if x_FG == 'True':
                        b_FG = True
                    else:
                        b_FG = False
                    if x_RE == 'True':
                        b_RE = True
                    else:
                        b_RE = False
                    logging.debug('filters :: FG=' + x_FG + ' ESA=' + x_ESA +
                                  ' RE=' + x_RE)
                    try:
                        w_snippet = w_persona.snippet_set.create(
                            query=x_query,
                            title=x_title,
                            description=x_description,
                            link=x_link,
                            FG=b_FG,
                            RE=b_RE,
                            ESA_score=x_ESA,
                            converging_pipelines=n)
                    except:
                        logging.debug('exception on saving snippet: converting' +w_snippet.description+ \
                                          'of type' +str(type(w_snippet.description)))

                        ascii_title = x_title.encode('ascii', 'replace')
                        ascii_description = x_description.encode(
                            'ascii', 'replace')
                        w_snippet = w_persona.snippet_set.create(
                            query=x_query,
                            title=ascii_title,
                            description=ascii_description,
                            link=x_link,
                            FG=b_FG,
                            RE=b_RE,
                            ESA_score=x_ESA,
                            converging_pipelines=n)

                    if x_query_type == 'name':
                        w_snippet.name_pipeline = True
                        logging.debug('filters:: query_type=name')
                    elif x_query_type == 'geo':
                        w_snippet.geo_pipeline = True
                        logging.debug('filters:: query_type=geo')
                    elif x_query_type == 'orgs':
                        w_snippet.orgs_pipeline = True
                        logging.debug('filters:: query_type=orgs')
                    elif x_query_type == 'topics':
                        w_snippet.topics_pipeline = True
                        logging.debug('filters:: query_type=topics')

                    if not filter_value:
                        w_snippet.save()
                    elif filter_value == x_FG:
                        w_snippet.save()
                    elif filter_value == 'All':
                        w_snippet.save()
            colombia.save()
Example #3
0
    busqueda_xml = etree.parse(xml_file)
except:
    logging.error('Invalid file' +xml_file)
    logging.error('Usage: python unoporuno_import xml_file_name')
    exit(-1)

logging.info('Processing file ' +xml_file)
L = Limpieza()
x_busqueda = busqueda_xml.getroot()
x_nombre = x_busqueda.find('nombre')
x_fecha = x_busqueda.find('fecha')
x_usuario = x_busqueda.find('usuario')
x_descripcion = x_busqueda.find('descripcion')
busqueda = Busqueda()
busqueda.nombre = L.limpia_reservados_xml(x_nombre.text)
busqueda.fecha = x_fecha.text
busqueda.usuario = L.limpia_reservados_xml(x_usuario.text)
busqueda.descripcion = L.limpia_reservados_xml(x_descripcion.text)
busqueda.save()
logging.info('Importing busqueda ' +busqueda.nombre)
x_personas = x_busqueda.find('personas')
x_personas_set = x_personas.findall('person')
limpia = L.limpia_reservados_xml
for x_persona in x_personas_set:
    persona = Persona()
    persona.busqueda = busqueda
    persona.name = limpia(x_persona.find('name').text)
    persona.geo = limpia(x_persona.find('geo').text)
    persona.orgs = limpia(x_persona.find('orgs').text)
    persona.topics = limpia(x_persona.find('topics').text)
    persona.link = limpia(x_persona.find('link').text)
Example #4
0
def main():
    filter_value = None
    colombia = Busqueda()
    colombia.nombre = sys.argv[2]
    results_path = sys.argv[1]
    colombia.fecha = datetime.datetime.now()
    colombia.usuario = '*'
    logging.debug('arguments path:' +sys.argv[1]+  ' name: ' + sys.argv[2])
    if len(sys.argv) > 3:
        filter_value = sys.argv[3]
        logging.debug(' filter_value' +filter_value)
    if len(sys.argv) > 4:
        colombia.usuario = sys.argv[4]
        logging.debug('arguments user:'******'arguments desc:'+sys.argv[5])
    results_path += '/results_converging/'
    if os.access(results_path, os.R_OK):
        colombia.save()
    else:
        logging.error('No access to '+results_path)
        exit(-1)

    for subdirs, dirs, files in os.walk(results_path):
        for file in files:
            re_xml = re.search('\.xml$', file)
            if not re_xml:
                continue
            logging.debug('processing file: ' +file)
            file_path = results_path + file
            person_tree = etree.parse(file_path)
            person = person_tree.getroot()
            x_name = person.find('name')
            x_geo = person.find('places')
            x_orgs = person.find('orgs')
            x_topics = person.find('topics')
            logging.info('processing person: '+x_name.text)
            w_persona = colombia.persona_set.create(name=x_name.text,
                   geo=x_geo.text, orgs=x_orgs.text,
                   topics=x_topics.text)
            converging_pipelines = person.findall('converging_pipelines')
            for pipeline in converging_pipelines:
                n = pipeline.get('number')
                n_converging = int(n) 
                logging.debug('Processing converging pipeline number '+n)
                snippets_root = pipeline.find('snippets')
                snippets = snippets_root.findall('snippet')
                for x_snippet in snippets:
                    x_query = clean_xml(x_snippet.find('query').text)
                    x_title = clean_xml(x_snippet.find('title').text)
                    x_description = clean_xml(x_snippet.find('description').text)[:SNIPPET_DESCRIPTION_LENGTH]
                    x_link = clean_xml(x_snippet.find('link').text)[:SNIPPET_LINK_LENGTH]
                    x_query_type = x_snippet.find('query_type').text
                    logging.debug('processing snippet '+ x_query +'::'+ x_title)
                    logging.debug('snippet description::' +x_description)
                    logging.debug('snippet link::' +x_link)
                    logging.debug('snippet query_type::' +x_query_type)
                    x_filters = x_snippet.find('filters')
                    x_FG = x_filters.get('FG')
                    x_ESA = x_filters.get('ESA')
                    x_RE = x_filters.get('RE')
                    if x_FG == 'True':
                        b_FG = True
                    else:
                        b_FG = False
                    if x_RE == 'True':
                        b_RE = True
                    else:
                        b_RE = False
                    logging.debug('filters :: FG=' +x_FG+ ' ESA=' +x_ESA+ ' RE=' +x_RE)
                    try:
                        w_snippet = w_persona.snippet_set.create(query=x_query, title=x_title, description=x_description,
                                                 link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n)
                    except:
                        logging.debug('exception on saving snippet: converting' +w_snippet.description+ \
                                          'of type' +str(type(w_snippet.description)))

                        ascii_title = x_title.encode('ascii', 'replace')
                        ascii_description = x_description.encode('ascii', 'replace')
                        w_snippet = w_persona.snippet_set.create(query=x_query, title=ascii_title, description=ascii_description,
                                                 link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n)

                    if x_query_type == 'name':
                        w_snippet.name_pipeline = True
                        logging.debug ('filters:: query_type=name')
                    elif x_query_type == 'geo':
                        w_snippet.geo_pipeline = True
                        logging.debug ('filters:: query_type=geo')
                    elif x_query_type == 'orgs':
                        w_snippet.orgs_pipeline = True
                        logging.debug ('filters:: query_type=orgs')
                    elif x_query_type == 'topics':
                        w_snippet.topics_pipeline = True
                        logging.debug ('filters:: query_type=topics')

                    if not filter_value:
                        w_snippet.save()
                    elif filter_value == x_FG:
                        w_snippet.save()
                    elif filter_value == 'All':
                        w_snippet.save()
            colombia.save()