xml_file = sys.argv[1] busqueda_xml = etree.parse(xml_file) except: logging.error('Invalid file' + xml_file) logging.error('Usage: python unoporuno_import xml_file_name') exit(-1) logging.info('Processing file ' + xml_file) L = Limpieza() x_busqueda = busqueda_xml.getroot() x_nombre = x_busqueda.find('nombre') x_fecha = x_busqueda.find('fecha') x_usuario = x_busqueda.find('usuario') x_descripcion = x_busqueda.find('descripcion') busqueda = Busqueda() busqueda.nombre = L.limpia_reservados_xml(x_nombre.text) busqueda.fecha = x_fecha.text busqueda.usuario = L.limpia_reservados_xml(x_usuario.text) busqueda.descripcion = L.limpia_reservados_xml(x_descripcion.text) busqueda.save() logging.info('Importing busqueda ' + busqueda.nombre) x_personas = x_busqueda.find('personas') x_personas_set = x_personas.findall('person') limpia = L.limpia_reservados_xml for x_persona in x_personas_set: persona = Persona() persona.busqueda = busqueda persona.name = limpia(x_persona.find('name').text) persona.geo = limpia(x_persona.find('geo').text) persona.orgs = limpia(x_persona.find('orgs').text) persona.topics = limpia(x_persona.find('topics').text)
def main(): filter_value = None colombia = Busqueda() colombia.nombre = sys.argv[2] results_path = sys.argv[1] colombia.fecha = datetime.datetime.now() colombia.usuario = '*' logging.debug('arguments path:' + sys.argv[1] + ' name: ' + sys.argv[2]) if len(sys.argv) > 3: filter_value = sys.argv[3] logging.debug(' filter_value' + filter_value) if len(sys.argv) > 4: colombia.usuario = sys.argv[4] logging.debug('arguments user:'******'arguments desc:' + sys.argv[5]) results_path += '/results_converging/' if os.access(results_path, os.R_OK): colombia.save() else: logging.error('No access to ' + results_path) exit(-1) for subdirs, dirs, files in os.walk(results_path): for file in files: re_xml = re.search('\.xml$', file) if not re_xml: continue logging.debug('processing file: ' + file) file_path = results_path + file person_tree = etree.parse(file_path) person = person_tree.getroot() x_name = person.find('name') x_geo = person.find('places') x_orgs = person.find('orgs') x_topics = person.find('topics') logging.info('processing person: ' + x_name.text) w_persona = colombia.persona_set.create(name=x_name.text, geo=x_geo.text, orgs=x_orgs.text, topics=x_topics.text) converging_pipelines = person.findall('converging_pipelines') for pipeline in converging_pipelines: n = pipeline.get('number') n_converging = int(n) logging.debug('Processing converging pipeline number ' + n) snippets_root = pipeline.find('snippets') snippets = snippets_root.findall('snippet') for x_snippet in snippets: x_query = clean_xml(x_snippet.find('query').text) x_title = clean_xml(x_snippet.find('title').text) x_description = clean_xml( x_snippet.find( 'description').text)[:SNIPPET_DESCRIPTION_LENGTH] x_link = clean_xml( x_snippet.find('link').text)[:SNIPPET_LINK_LENGTH] x_query_type = x_snippet.find('query_type').text logging.debug('processing snippet ' + x_query + '::' + x_title) logging.debug('snippet description::' + x_description) logging.debug('snippet link::' + x_link) logging.debug('snippet query_type::' + x_query_type) x_filters = x_snippet.find('filters') x_FG = x_filters.get('FG') x_ESA = x_filters.get('ESA') x_RE = x_filters.get('RE') if x_FG == 'True': b_FG = True else: b_FG = False if x_RE == 'True': b_RE = True else: b_RE = False logging.debug('filters :: FG=' + x_FG + ' ESA=' + x_ESA + ' RE=' + x_RE) try: w_snippet = w_persona.snippet_set.create( query=x_query, title=x_title, description=x_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) except: logging.debug('exception on saving snippet: converting' +w_snippet.description+ \ 'of type' +str(type(w_snippet.description))) ascii_title = x_title.encode('ascii', 'replace') ascii_description = x_description.encode( 'ascii', 'replace') w_snippet = w_persona.snippet_set.create( query=x_query, title=ascii_title, description=ascii_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) if x_query_type == 'name': w_snippet.name_pipeline = True logging.debug('filters:: query_type=name') elif x_query_type == 'geo': w_snippet.geo_pipeline = True logging.debug('filters:: query_type=geo') elif x_query_type == 'orgs': w_snippet.orgs_pipeline = True logging.debug('filters:: query_type=orgs') elif x_query_type == 'topics': w_snippet.topics_pipeline = True logging.debug('filters:: query_type=topics') if not filter_value: w_snippet.save() elif filter_value == x_FG: w_snippet.save() elif filter_value == 'All': w_snippet.save() colombia.save()
xml_file = sys.argv[1] busqueda_xml = etree.parse(xml_file) except: logging.error('Invalid file' +xml_file) logging.error('Usage: python unoporuno_import xml_file_name') exit(-1) logging.info('Processing file ' +xml_file) L = Limpieza() x_busqueda = busqueda_xml.getroot() x_nombre = x_busqueda.find('nombre') x_fecha = x_busqueda.find('fecha') x_usuario = x_busqueda.find('usuario') x_descripcion = x_busqueda.find('descripcion') busqueda = Busqueda() busqueda.nombre = L.limpia_reservados_xml(x_nombre.text) busqueda.fecha = x_fecha.text busqueda.usuario = L.limpia_reservados_xml(x_usuario.text) busqueda.descripcion = L.limpia_reservados_xml(x_descripcion.text) busqueda.save() logging.info('Importing busqueda ' +busqueda.nombre) x_personas = x_busqueda.find('personas') x_personas_set = x_personas.findall('person') limpia = L.limpia_reservados_xml for x_persona in x_personas_set: persona = Persona() persona.busqueda = busqueda persona.name = limpia(x_persona.find('name').text) persona.geo = limpia(x_persona.find('geo').text) persona.orgs = limpia(x_persona.find('orgs').text) persona.topics = limpia(x_persona.find('topics').text)
def main(): filter_value = None colombia = Busqueda() colombia.nombre = sys.argv[2] results_path = sys.argv[1] colombia.fecha = datetime.datetime.now() colombia.usuario = '*' logging.debug('arguments path:' +sys.argv[1]+ ' name: ' + sys.argv[2]) if len(sys.argv) > 3: filter_value = sys.argv[3] logging.debug(' filter_value' +filter_value) if len(sys.argv) > 4: colombia.usuario = sys.argv[4] logging.debug('arguments user:'******'arguments desc:'+sys.argv[5]) results_path += '/results_converging/' if os.access(results_path, os.R_OK): colombia.save() else: logging.error('No access to '+results_path) exit(-1) for subdirs, dirs, files in os.walk(results_path): for file in files: re_xml = re.search('\.xml$', file) if not re_xml: continue logging.debug('processing file: ' +file) file_path = results_path + file person_tree = etree.parse(file_path) person = person_tree.getroot() x_name = person.find('name') x_geo = person.find('places') x_orgs = person.find('orgs') x_topics = person.find('topics') logging.info('processing person: '+x_name.text) w_persona = colombia.persona_set.create(name=x_name.text, geo=x_geo.text, orgs=x_orgs.text, topics=x_topics.text) converging_pipelines = person.findall('converging_pipelines') for pipeline in converging_pipelines: n = pipeline.get('number') n_converging = int(n) logging.debug('Processing converging pipeline number '+n) snippets_root = pipeline.find('snippets') snippets = snippets_root.findall('snippet') for x_snippet in snippets: x_query = clean_xml(x_snippet.find('query').text) x_title = clean_xml(x_snippet.find('title').text) x_description = clean_xml(x_snippet.find('description').text)[:SNIPPET_DESCRIPTION_LENGTH] x_link = clean_xml(x_snippet.find('link').text)[:SNIPPET_LINK_LENGTH] x_query_type = x_snippet.find('query_type').text logging.debug('processing snippet '+ x_query +'::'+ x_title) logging.debug('snippet description::' +x_description) logging.debug('snippet link::' +x_link) logging.debug('snippet query_type::' +x_query_type) x_filters = x_snippet.find('filters') x_FG = x_filters.get('FG') x_ESA = x_filters.get('ESA') x_RE = x_filters.get('RE') if x_FG == 'True': b_FG = True else: b_FG = False if x_RE == 'True': b_RE = True else: b_RE = False logging.debug('filters :: FG=' +x_FG+ ' ESA=' +x_ESA+ ' RE=' +x_RE) try: w_snippet = w_persona.snippet_set.create(query=x_query, title=x_title, description=x_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) except: logging.debug('exception on saving snippet: converting' +w_snippet.description+ \ 'of type' +str(type(w_snippet.description))) ascii_title = x_title.encode('ascii', 'replace') ascii_description = x_description.encode('ascii', 'replace') w_snippet = w_persona.snippet_set.create(query=x_query, title=ascii_title, description=ascii_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) if x_query_type == 'name': w_snippet.name_pipeline = True logging.debug ('filters:: query_type=name') elif x_query_type == 'geo': w_snippet.geo_pipeline = True logging.debug ('filters:: query_type=geo') elif x_query_type == 'orgs': w_snippet.orgs_pipeline = True logging.debug ('filters:: query_type=orgs') elif x_query_type == 'topics': w_snippet.topics_pipeline = True logging.debug ('filters:: query_type=topics') if not filter_value: w_snippet.save() elif filter_value == x_FG: w_snippet.save() elif filter_value == 'All': w_snippet.save() colombia.save()