logging.error('Usage: python unoporuno_import xml_file_name') exit(-1) logging.info('Processing file ' + xml_file) L = Limpieza() x_busqueda = busqueda_xml.getroot() x_nombre = x_busqueda.find('nombre') x_fecha = x_busqueda.find('fecha') x_usuario = x_busqueda.find('usuario') x_descripcion = x_busqueda.find('descripcion') busqueda = Busqueda() busqueda.nombre = L.limpia_reservados_xml(x_nombre.text) busqueda.fecha = x_fecha.text busqueda.usuario = L.limpia_reservados_xml(x_usuario.text) busqueda.descripcion = L.limpia_reservados_xml(x_descripcion.text) busqueda.save() logging.info('Importing busqueda ' + busqueda.nombre) x_personas = x_busqueda.find('personas') x_personas_set = x_personas.findall('person') limpia = L.limpia_reservados_xml for x_persona in x_personas_set: persona = Persona() persona.busqueda = busqueda persona.name = limpia(x_persona.find('name').text) persona.geo = limpia(x_persona.find('geo').text) persona.orgs = limpia(x_persona.find('orgs').text) persona.topics = limpia(x_persona.find('topics').text) persona.link = limpia(x_persona.find('link').text) try: persona.mobility_status = x_persona.find('mobility_status').text persona.save()
class Busqueda_DB(object): def __init__(self, unoporuno_root): logging.basicConfig(level=logging.DEBUG) self.unoporuno_root = unoporuno_root self.unoporuno_modules = unoporuno_root + "/modules/" self.webapp_path = unoporuno_root + "/webapp/" def new(self, name, fg_select='All', user='', description=''): logging.debug("busqueda_db::nueva búsqueda") if not self.webapp_path in sys.path: sys.path.append(self.webapp_path) sys.path.append(self.webapp_path + 'cidesal/') from unoporuno.models import Busqueda, Persona, Snippet self.busqueda = Busqueda() self.busqueda.nombre = name self.busqueda.fecha = datetime.datetime.now() self.busqueda.usuario = user self.busqueda.descripcion = description self.FG_select = fg_select self.busqueda.save() return int(self.busqueda.id) def get(self, busqueda_id, fg_select='All'): if not self.webapp_path in sys.path: sys.path.append(self.webapp_path) sys.path.append(self.webapp_path + 'cidesal/') from unoporuno.models import Busqueda, Persona, Snippet logging.debug("busqueda_db::leyendo datos de búsqueda " + str(busqueda_id)) self.busqueda = Busqueda.objects.get(id=busqueda_id) self.FG_select = fg_select def delete(self, busqueda_id, name=''): logging.debug("busqueda_db::deleting busqueda_id:" + str(id)) if not self.webapp_path in sys.path: sys.path.append(self.webapp_path) sys.path.append(self.webapp_path + 'cidesal/') from unoporuno.models import Busqueda, Persona, Snippet self.busqueda = Busqueda.objects.get(id=busqueda_id) self.busqueda.delete() def update_person_from_file(self, file_path): if not os.access(file_path, os.R_OK): logging.error('No access to ' + file_path) return False filter_value = self.FG_select re_xml = re.search('\.xml$', file_path) if not re_xml: logging.error(file_path + ' is not an xml file') return False logging.debug('Busqueda_DB::processing file: ' + file_path) person_tree = etree.parse(file_path) person = person_tree.getroot() #change_log 17/01/12 JGF # los límites están especificados en # UNOPORUNO_ROOT/webapp/cidesal/unoporuno/models.py x_name = person.find('name').text[:250] x_geo = person.find('places').text[:300] if person.find( 'places').text else '' x_orgs = person.find('orgs').text[:400] if person.find( 'orgs').text else '' x_topics = person.find('topics').text[:400] if person.find( 'topics').text else '' x_link = person.find('link') if person.find('link').text else '' if x_name is None: return False if x_name is None: x_name = '' if x_orgs is None: x_orgs = '' if x_geo is None: x_geo = '' if x_topics is None: x_topics = '' if x_link is None: x_link = '' logging.info('processing person: ' + x_name) logging.debug('x_geo=' + x_geo + 'of type ' + str(type(x_geo))) logging.debug('x_orgs=' + x_orgs + 'of type ' + str(type(x_orgs))) logging.debug('x_topics=' + x_topics + 'of type ' + str(type(x_topics))) w_persona = self.busqueda.persona_set.create(name=x_name, geo=x_geo, orgs=x_orgs, topics=x_topics, link=x_link) converging_pipelines = person.findall('converging_pipelines') for pipeline in converging_pipelines: n = pipeline.get('number') n_converging = int(n) logging.debug( 'Busqueda_DB::Processing converging pipeline number ' + n) snippets_root = pipeline.find('snippets') snippets = snippets_root.findall('snippet') for x_snippet in snippets: x_query = self.clean_xml(x_snippet.find('query').text) x_title = self.clean_xml(x_snippet.find('title').text) x_description = self.clean_xml( x_snippet.find( 'description').text)[:SNIPPET_DESCRIPTION_LENGTH] x_link = self.clean_xml( x_snippet.find('link').text)[:SNIPPET_LINK_LENGTH] x_query_type = x_snippet.find('query_type').text logging.debug('Busqueda_DB::processing snippet ' + x_query + '::' + x_title) logging.debug('Busqueda_DB::snippet description::' + x_description) logging.debug('Busqueda_DB::snippet link::' + x_link) logging.debug('Busqueda_DB::snippet query_type::' + x_query_type) x_filters = x_snippet.find('filters') x_FG = x_filters.get('FG') x_ESA = x_filters.get('ESA') x_RE = x_filters.get('RE') if x_FG == 'True': b_FG = True else: b_FG = False if x_RE == 'True': b_RE = True else: b_RE = False logging.debug('Busqueda_DB::filters :: FG=' + x_FG + ' ESA=' + x_ESA + ' RE=' + x_RE) try: w_snippet = w_persona.snippet_set.create( query=x_query, title=x_title, description=x_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) except: logging.debug('exception on saving snippet: converting' +w_snippet.description+ \ 'of type' +str(type(w_snippet.description))) ascii_title = x_title.encode('ascii', 'replace') ascii_description = x_description.encode( 'ascii', 'replace') w_snippet = w_persona.snippet_set.create( query=x_query, title=ascii_title, description=ascii_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) if x_query_type == 'name': w_snippet.name_pipeline = True logging.debug('filters:: query_type=name') elif x_query_type == 'geo': w_snippet.geo_pipeline = True logging.debug('filters:: query_type=geo') elif x_query_type == 'orgs': w_snippet.orgs_pipeline = True logging.debug('filters:: query_type=orgs') elif x_query_type == 'topics': w_snippet.topics_pipeline = True logging.debug('filters:: query_type=topics') if not filter_value: w_snippet.save() elif filter_value == x_FG: w_snippet.save() elif filter_value == 'All': w_snippet.save() self.busqueda.save() return w_persona.id def clean_xml(self, line): if line: line2 = re.subn('"', '"', line) line3 = re.subn('&', '&', line2[0]) line4 = re.subn(''', "'", line3[0]) line5 = re.subn('<', '<', line4[0]) line6 = re.subn('>', '>', line5[0]) return line6[0] else: return ''
logging.error('Usage: python unoporuno_import xml_file_name') exit(-1) logging.info('Processing file ' +xml_file) L = Limpieza() x_busqueda = busqueda_xml.getroot() x_nombre = x_busqueda.find('nombre') x_fecha = x_busqueda.find('fecha') x_usuario = x_busqueda.find('usuario') x_descripcion = x_busqueda.find('descripcion') busqueda = Busqueda() busqueda.nombre = L.limpia_reservados_xml(x_nombre.text) busqueda.fecha = x_fecha.text busqueda.usuario = L.limpia_reservados_xml(x_usuario.text) busqueda.descripcion = L.limpia_reservados_xml(x_descripcion.text) busqueda.save() logging.info('Importing busqueda ' +busqueda.nombre) x_personas = x_busqueda.find('personas') x_personas_set = x_personas.findall('person') limpia = L.limpia_reservados_xml for x_persona in x_personas_set: persona = Persona() persona.busqueda = busqueda persona.name = limpia(x_persona.find('name').text) persona.geo = limpia(x_persona.find('geo').text) persona.orgs = limpia(x_persona.find('orgs').text) persona.topics = limpia(x_persona.find('topics').text) persona.link = limpia(x_persona.find('link').text) try: persona.mobility_status = x_persona.find('mobility_status').text persona.save()
class Busqueda_DB(object): def __init__(self, unoporuno_root): logging.basicConfig(level=logging.DEBUG) self.unoporuno_root = unoporuno_root self.unoporuno_modules = unoporuno_root + "/modules/" self.webapp_path = unoporuno_root + "/webapp/" def new(self, name, fg_select='All', user='', description=''): logging.debug ("busqueda_db::nueva búsqueda") if not self.webapp_path in sys.path: sys.path.append(self.webapp_path) sys.path.append(self.webapp_path+'cidesal/') from unoporuno.models import Busqueda, Persona, Snippet self.busqueda = Busqueda() self.busqueda.nombre = name self.busqueda.fecha = datetime.datetime.now() self.busqueda.usuario = user self.busqueda.descripcion = description self.FG_select = fg_select self.busqueda.save() return int(self.busqueda.id) def get(self, busqueda_id, fg_select='All'): if not self.webapp_path in sys.path: sys.path.append(self.webapp_path) sys.path.append(self.webapp_path+'cidesal/') from unoporuno.models import Busqueda, Persona, Snippet logging.debug ("busqueda_db::leyendo datos de búsqueda " +str(busqueda_id)) self.busqueda = Busqueda.objects.get(id=busqueda_id) self.FG_select = fg_select def delete(self, busqueda_id, name=''): logging.debug ("busqueda_db::deleting busqueda_id:" +str(id)) if not self.webapp_path in sys.path: sys.path.append(self.webapp_path) sys.path.append(self.webapp_path+'cidesal/') from unoporuno.models import Busqueda, Persona, Snippet self.busqueda = Busqueda.objects.get(id=busqueda_id) self.busqueda.delete() def update_person_from_file(self, file_path): if not os.access(file_path, os.R_OK): logging.error('No access to '+file_path) return False filter_value = self.FG_select re_xml = re.search('\.xml$', file_path) if not re_xml: logging.error(file_path +' is not an xml file') return False logging.debug('Busqueda_DB::processing file: ' +file_path) person_tree = etree.parse(file_path) person = person_tree.getroot() #change_log 17/01/12 JGF # los límites están especificados en # UNOPORUNO_ROOT/webapp/cidesal/unoporuno/models.py x_name = person.find('name').text[:250] x_geo = person.find('places').text[:300] if person.find('places').text else '' x_orgs = person.find('orgs').text[:400] if person.find('orgs').text else '' x_topics = person.find('topics').text[:400] if person.find('topics').text else '' x_link = person.find('link') if person.find('link').text else '' if x_name is None: return False if x_name is None: x_name = '' if x_orgs is None: x_orgs = '' if x_geo is None: x_geo = '' if x_topics is None: x_topics = '' if x_link is None: x_link = '' logging.info('processing person: '+x_name) logging.debug('x_geo=' + x_geo +'of type ' +str(type(x_geo))) logging.debug('x_orgs=' + x_orgs +'of type ' +str(type(x_orgs))) logging.debug('x_topics=' + x_topics +'of type ' +str(type(x_topics))) w_persona = self.busqueda.persona_set.create(name=x_name, geo=x_geo, orgs=x_orgs, topics=x_topics, link=x_link) converging_pipelines = person.findall('converging_pipelines') for pipeline in converging_pipelines: n = pipeline.get('number') n_converging = int(n) logging.debug('Busqueda_DB::Processing converging pipeline number '+n) snippets_root = pipeline.find('snippets') snippets = snippets_root.findall('snippet') for x_snippet in snippets: x_query = self.clean_xml(x_snippet.find('query').text) x_title = self.clean_xml(x_snippet.find('title').text) x_description = self.clean_xml(x_snippet.find('description').text)[:SNIPPET_DESCRIPTION_LENGTH] x_link = self.clean_xml(x_snippet.find('link').text)[:SNIPPET_LINK_LENGTH] x_query_type = x_snippet.find('query_type').text logging.debug('Busqueda_DB::processing snippet '+ x_query +'::'+ x_title) logging.debug('Busqueda_DB::snippet description::' +x_description) logging.debug('Busqueda_DB::snippet link::' +x_link) logging.debug('Busqueda_DB::snippet query_type::' +x_query_type) x_filters = x_snippet.find('filters') x_FG = x_filters.get('FG') x_ESA = x_filters.get('ESA') x_RE = x_filters.get('RE') if x_FG == 'True': b_FG = True else: b_FG = False if x_RE == 'True': b_RE = True else: b_RE = False logging.debug('Busqueda_DB::filters :: FG=' +x_FG+ ' ESA=' +x_ESA+ ' RE=' +x_RE) try: w_snippet = w_persona.snippet_set.create(query=x_query, title=x_title, description=x_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) except: logging.debug('exception on saving snippet: converting' +w_snippet.description+ \ 'of type' +str(type(w_snippet.description))) ascii_title = x_title.encode('ascii', 'replace') ascii_description = x_description.encode('ascii', 'replace') w_snippet = w_persona.snippet_set.create(query=x_query, title=ascii_title, description=ascii_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) if x_query_type == 'name': w_snippet.name_pipeline = True logging.debug ('filters:: query_type=name') elif x_query_type == 'geo': w_snippet.geo_pipeline = True logging.debug ('filters:: query_type=geo') elif x_query_type == 'orgs': w_snippet.orgs_pipeline = True logging.debug ('filters:: query_type=orgs') elif x_query_type == 'topics': w_snippet.topics_pipeline = True logging.debug ('filters:: query_type=topics') if not filter_value: w_snippet.save() elif filter_value == x_FG: w_snippet.save() elif filter_value == 'All': w_snippet.save() self.busqueda.save() return w_persona.id def clean_xml(self, line): if line: line2 = re.subn('"', '"', line) line3 = re.subn('&', '&', line2[0]) line4 = re.subn(''', "'", line3[0]) line5 = re.subn('<', '<', line4[0]) line6 = re.subn('>', '>', line5[0]) return line6[0] else: return ''
def main(): filter_value = None colombia = Busqueda() colombia.nombre = sys.argv[2] results_path = sys.argv[1] colombia.fecha = datetime.datetime.now() colombia.usuario = '*' logging.debug('arguments path:' + sys.argv[1] + ' name: ' + sys.argv[2]) if len(sys.argv) > 3: filter_value = sys.argv[3] logging.debug(' filter_value' + filter_value) if len(sys.argv) > 4: colombia.usuario = sys.argv[4] logging.debug('arguments user:'******'arguments desc:' + sys.argv[5]) results_path += '/results_converging/' if os.access(results_path, os.R_OK): colombia.save() else: logging.error('No access to ' + results_path) exit(-1) for subdirs, dirs, files in os.walk(results_path): for file in files: re_xml = re.search('\.xml$', file) if not re_xml: continue logging.debug('processing file: ' + file) file_path = results_path + file person_tree = etree.parse(file_path) person = person_tree.getroot() x_name = person.find('name') x_geo = person.find('places') x_orgs = person.find('orgs') x_topics = person.find('topics') logging.info('processing person: ' + x_name.text) w_persona = colombia.persona_set.create(name=x_name.text, geo=x_geo.text, orgs=x_orgs.text, topics=x_topics.text) converging_pipelines = person.findall('converging_pipelines') for pipeline in converging_pipelines: n = pipeline.get('number') n_converging = int(n) logging.debug('Processing converging pipeline number ' + n) snippets_root = pipeline.find('snippets') snippets = snippets_root.findall('snippet') for x_snippet in snippets: x_query = clean_xml(x_snippet.find('query').text) x_title = clean_xml(x_snippet.find('title').text) x_description = clean_xml( x_snippet.find( 'description').text)[:SNIPPET_DESCRIPTION_LENGTH] x_link = clean_xml( x_snippet.find('link').text)[:SNIPPET_LINK_LENGTH] x_query_type = x_snippet.find('query_type').text logging.debug('processing snippet ' + x_query + '::' + x_title) logging.debug('snippet description::' + x_description) logging.debug('snippet link::' + x_link) logging.debug('snippet query_type::' + x_query_type) x_filters = x_snippet.find('filters') x_FG = x_filters.get('FG') x_ESA = x_filters.get('ESA') x_RE = x_filters.get('RE') if x_FG == 'True': b_FG = True else: b_FG = False if x_RE == 'True': b_RE = True else: b_RE = False logging.debug('filters :: FG=' + x_FG + ' ESA=' + x_ESA + ' RE=' + x_RE) try: w_snippet = w_persona.snippet_set.create( query=x_query, title=x_title, description=x_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) except: logging.debug('exception on saving snippet: converting' +w_snippet.description+ \ 'of type' +str(type(w_snippet.description))) ascii_title = x_title.encode('ascii', 'replace') ascii_description = x_description.encode( 'ascii', 'replace') w_snippet = w_persona.snippet_set.create( query=x_query, title=ascii_title, description=ascii_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) if x_query_type == 'name': w_snippet.name_pipeline = True logging.debug('filters:: query_type=name') elif x_query_type == 'geo': w_snippet.geo_pipeline = True logging.debug('filters:: query_type=geo') elif x_query_type == 'orgs': w_snippet.orgs_pipeline = True logging.debug('filters:: query_type=orgs') elif x_query_type == 'topics': w_snippet.topics_pipeline = True logging.debug('filters:: query_type=topics') if not filter_value: w_snippet.save() elif filter_value == x_FG: w_snippet.save() elif filter_value == 'All': w_snippet.save() colombia.save()
def main(): filter_value = None colombia = Busqueda() colombia.nombre = sys.argv[2] results_path = sys.argv[1] colombia.fecha = datetime.datetime.now() colombia.usuario = '*' logging.debug('arguments path:' +sys.argv[1]+ ' name: ' + sys.argv[2]) if len(sys.argv) > 3: filter_value = sys.argv[3] logging.debug(' filter_value' +filter_value) if len(sys.argv) > 4: colombia.usuario = sys.argv[4] logging.debug('arguments user:'******'arguments desc:'+sys.argv[5]) results_path += '/results_converging/' if os.access(results_path, os.R_OK): colombia.save() else: logging.error('No access to '+results_path) exit(-1) for subdirs, dirs, files in os.walk(results_path): for file in files: re_xml = re.search('\.xml$', file) if not re_xml: continue logging.debug('processing file: ' +file) file_path = results_path + file person_tree = etree.parse(file_path) person = person_tree.getroot() x_name = person.find('name') x_geo = person.find('places') x_orgs = person.find('orgs') x_topics = person.find('topics') logging.info('processing person: '+x_name.text) w_persona = colombia.persona_set.create(name=x_name.text, geo=x_geo.text, orgs=x_orgs.text, topics=x_topics.text) converging_pipelines = person.findall('converging_pipelines') for pipeline in converging_pipelines: n = pipeline.get('number') n_converging = int(n) logging.debug('Processing converging pipeline number '+n) snippets_root = pipeline.find('snippets') snippets = snippets_root.findall('snippet') for x_snippet in snippets: x_query = clean_xml(x_snippet.find('query').text) x_title = clean_xml(x_snippet.find('title').text) x_description = clean_xml(x_snippet.find('description').text)[:SNIPPET_DESCRIPTION_LENGTH] x_link = clean_xml(x_snippet.find('link').text)[:SNIPPET_LINK_LENGTH] x_query_type = x_snippet.find('query_type').text logging.debug('processing snippet '+ x_query +'::'+ x_title) logging.debug('snippet description::' +x_description) logging.debug('snippet link::' +x_link) logging.debug('snippet query_type::' +x_query_type) x_filters = x_snippet.find('filters') x_FG = x_filters.get('FG') x_ESA = x_filters.get('ESA') x_RE = x_filters.get('RE') if x_FG == 'True': b_FG = True else: b_FG = False if x_RE == 'True': b_RE = True else: b_RE = False logging.debug('filters :: FG=' +x_FG+ ' ESA=' +x_ESA+ ' RE=' +x_RE) try: w_snippet = w_persona.snippet_set.create(query=x_query, title=x_title, description=x_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) except: logging.debug('exception on saving snippet: converting' +w_snippet.description+ \ 'of type' +str(type(w_snippet.description))) ascii_title = x_title.encode('ascii', 'replace') ascii_description = x_description.encode('ascii', 'replace') w_snippet = w_persona.snippet_set.create(query=x_query, title=ascii_title, description=ascii_description, link=x_link, FG=b_FG, RE=b_RE, ESA_score=x_ESA, converging_pipelines=n) if x_query_type == 'name': w_snippet.name_pipeline = True logging.debug ('filters:: query_type=name') elif x_query_type == 'geo': w_snippet.geo_pipeline = True logging.debug ('filters:: query_type=geo') elif x_query_type == 'orgs': w_snippet.orgs_pipeline = True logging.debug ('filters:: query_type=orgs') elif x_query_type == 'topics': w_snippet.topics_pipeline = True logging.debug ('filters:: query_type=topics') if not filter_value: w_snippet.save() elif filter_value == x_FG: w_snippet.save() elif filter_value == 'All': w_snippet.save() colombia.save()