def hash_control(self): """This method will create a table that keeps control about all the system uploaded data """ import hashlib from datetime import datetime import shutil # Generate hash with file content h = hashlib.md5() f = open(self.response, 'r') h.update(f.read()) # Copy file to repository session = model.Session #metadata = model.metadata # Create table if it doesn't exists setup_model() # First check if hash is already in database results = session.query(DataRepository.hash).filter_by(hash=h.hexdigest()).all() #self.log(results) if len(results) > 0: #log.error('This file %s has the same hash of a file already in\ # database. Aborting' % self.response) self.log( 'This file %s has the same hash of a file already in\ database. Aborting' % self.response) os.remove(self.response) return True # Today's date file_date = datetime.today() # Filename hash to store filename, extension = os.path.splitext(os.path.basename(self.response)) h2 = hashlib.md5() h2.update(file_date.__str__() + filename) filename = h2.hexdigest() + extension # Now add full repository path to filename filename2 = os.path.join(self.repository,filename) # Now insert data and copy file to repository #log.warning('Inserting file %s in repository' % self.response) self.log('Inserting file %s in repository' % self.response) # Copy file to repository shutil.copy2(self.response,filename2) # insert info in database repository = DataRepository(hash=h.hexdigest(), creation_date=file_date.today(), original_file = filename2, package_file=self.response) session.add(repository) session.commit() #log.warning('File inserted') self.log('File inserted') # Remove other file os.remove(self.response) self.response = filename2 return False
def log_error(self,file_dict): """ Keep a repository of import errors in repository """ import hashlib from datetime import datetime import shutil # Today's date file_date = datetime.today() # Generate hash with file content h = hashlib.md5() f = open(file_dict['tmpfile'], 'r') h.update(f.read() + file_date.__str__()) f.close() # Copy file to repository session = model.Session # Create table if it doesn't exists setup_model() # First check if hash is already in database results = session.query(ErrorRepository.hash).filter_by(hash=h.hexdigest()).all() if len(results) > 0: self.log( 'This file %s has the same hash of a file already in\ database. Aborting' % file_dict['filename']) os.remove(file_dict['tmpfile']) return # Filename hash to store filename3, extension = os.path.splitext(os.path.basename(file_dict['filename'])) filename3 = file_date.__str__() + '-' + filename3 + extension # Now add full repository path to filename filename2 = os.path.join(self.repository,os.path.join('import_errors',filename3.replace(' ', '-'))) # Now insert data and copy file to repository #log.error('Error parsing file %s. Inserting in repository' % file_dict['filename']) self.log('Error in file %s. Inserting in repository with message\n %s' % (file_dict['filename'],file_dict.get('errmsg'))) # Create base dir if it doesn't exist if not os.path.exists(os.path.join(self.repository,'import_errors')): os.mkdir(os.path.join(self.repository,'import_errors'), 0770) # Copy file to repository shutil.copy2(file_dict['tmpfile'],filename2) # insert info in database repository = ErrorRepository( hash=h.hexdigest(), creation_date=file_date.today(), original_file=filename2, errmsg=file_dict.get('errmsg'), error_type=file_dict.get('error_type'), package_file=file_dict.get('package_file') ) session.add(repository) session.commit() #log.warning('File inserted') self.log('File inserted') # Remove other file os.remove(file_dict['tmpfile'])
class lightbaseAdminController(AdminController): """ Add controller to log page """ def log(self): import sqlalchemy from ckan import plugins, model from ckanext.datadaemon.model import setup as setup_model from ckanext.datadaemon.model import ErrorRepository # Adding pagination q = c.q = request.params.get('q', u'') # unicode format (decoded from utf8) try: page = int(request.params.get('page', 1)) except ValueError, e: abort(400, ('"page" parameter must be an integer')) limit = 20 # most search operations should reset the page counter: params_nopage = [(k, v) for k, v in request.params.items() if k != 'page'] def search_url(params): url = '/ckan-admin/log' params = [(k, v.encode('utf-8') if isinstance(v, basestring) else str(v)) \ for k, v in params] return url + u'?' + urlencode(params) def drill_down_url(**by): params = list(params_nopage) params.extend(by.items()) return search_url(set(params)) c.drill_down_url = drill_down_url def remove_field(key, value): params = list(params_nopage) params.remove((key, value)) return search_url(params) c.remove_field = remove_field def pager_url(q=None, page=None): params = list(params_nopage) params.append(('page', page)) return search_url(params) params = request.params setup_model() session = ckan.model.Session # Variáveis de entrada: # data: Postagem. Valores: 24, 168, 720 # data_inicio # data_final # tipo: Tipo de Error. Valores: None, ParsingError, FileRetrievalError, FileCollectionError # Construir uma consulta com base nas variáveis de entrada # 1 - Criar consulta que traz todos os valores query = "SELECT e.* FROM dt_errors e WHERE 1 = 1" # 2 - Filtrar pela variável data. Ou eu filtro pelo período ou por data de início # e data de fim value_inicio = "" value_final = "" if params.get('data'): query = query + "AND creation_date >= (now() - interval '%s hours')" % params.get( 'data') else: if params.get('data_inicio'): query = query + "AND creation_date >= '%s'" % params.get( 'data_inicio') value_inicio = params.get('data_inicio') if params.get('data_final'): query = query + "AND creation_date <= '%s'" % params.get( 'data_final') value_final = params.get('data_final') # 3 - Filtrar por tipo de erro if params.get('tipo') == 'None': query = query + "AND error_type is NULL " elif params.get('tipo'): query = query + "AND error_type = '%s'" % params.get('tipo') # 4 - Adicionar paginação à consulta query_pagination = query + "ORDER BY creation_date DESC LIMIT %s OFFSET %s" % ( limit * page, (limit * page) - limit) error_list = session.query(ErrorRepository).from_statement( query_pagination).all() error_list2 = session.query(ErrorRepository).from_statement( query).all() error_list3 = session.query( ErrorRepository.original_file).from_statement( query_pagination).all() from os.path import basename retorno = list() for lista in error_list: lista.original_file = basename(lista.original_file) retorno.append(lista) tipos_de_erros = session.query( ErrorRepository.error_type).distinct().all() data = params.get('data') tipo = params.get('tipo') x = { 'valor': retorno, 'valor2': tipos_de_erros, 'valor3': value_inicio, 'valor4': value_final, 'valor5': data, 'valor6': tipo } c.page = h.Page(collection=error_list2, page=page, url=pager_url, item_count=len(error_list2), items_per_page=limit) #--------------Download do arquivo------------- from paste.fileapp import FileApp import mimetypes if params.get('arquivo'): filepath = session.query(ErrorRepository.original_file).filter( ErrorRepository.hash == params.get('arquivo')).all() content_type = mimetypes.guess_type(str(filepath[0][0])) if content_type: headers = [ ('Content-Disposition', 'attachment; filename=\"' + str(filepath[0][0]) + '\"'), ('Content-Type', '\'' + str(content_type[0]) + '\'') ] else: headers = [ ('Content-Disposition', 'attachment; filename=\"' + str(filepath[0][0]) + '\"'), ('Content-Type', 'aplication/octet-stream') ] fapp = FileApp(str(filepath[0][0]), headers) return fapp(request.environ, self.start_response) else: return render('admin/log.html', extra_vars=x)