def read_log(self, logfile): logfile = logfile.strip() # Verifica se arquivo já foi processado. if self._proc_coll.find({'file_name': logfile}).count() > 0: logger.info('File already processed %s' % logfile) return None reader = codecs if utils.check_file_format(logfile) == 'gzip': reader = gzip # Registra em base de dados de arquivos processados o novo arquivo. logger.info("Processing: %s" % logfile) self._proc_coll.insert({'file_name': logfile}) with reader.open(logfile, 'rb') as f: with Local(self._mongo_uri, self._collection) as rq: log_file_line = 0 for raw_line in f: log_file_line += 1 logger.debug("Reading line {0} from file {1}".format( str(log_file_line), logfile)) logger.debug(raw_line) try: parsed_line = self._ac.parsed_access(raw_line) except ValueError as e: logger.error("%s: %s" % (e.message, raw_line)) continue if not parsed_line: continue if COUNTER_COMPLIANT: # Counter Mode Accesses locktime = 10 if parsed_line['access_type'] == "PDF": locktime = 30 try: lockid = '_'.join([ parsed_line['ip'], parsed_line['code'], parsed_line['script'] ]) self._ts.add(lockid, parsed_line['iso_datetime'], locktime) rq.register_access(parsed_line) except ValueError: self.write_skipped_log('; '.join([ lockid, parsed_line['original_date'], parsed_line['original_agent'] ])) continue else: pass # SciELO Mode Accesses rq.register_access(parsed_line) rq.send()
def read_log(self, logfile): logfile = logfile.strip() # Verifica se arquivo já foi processado. if self._proc_coll.find({'file_name': logfile}).count() > 0: logger.info('File already processed %s' % logfile) return None reader = codecs if utils.check_file_format(logfile) == 'gzip': reader = gzip # Registra em base de dados de arquivos processados o novo arquivo. logger.info("Processing: %s" % logfile) self._proc_coll.insert({'file_name': logfile}) with reader.open(logfile, 'rb') as f: with Local(self._mongo_uri, self._collection) as rq: log_file_line = 0 for raw_line in f: log_file_line += 1 logger.debug("Reading line {0} from file {1}".format( str(log_file_line), logfile)) logger.debug(raw_line) try: parsed_line = self._ac.parsed_access(raw_line) except ValueError as e: logger.error("%s: %s" % (e.message, raw_line)) continue if not parsed_line: continue if COUNTER_COMPLIANT: # Counter Mode Accesses locktime = 10 if parsed_line['access_type'] == "PDF": locktime = 30 try: lockid = '_'.join([parsed_line['ip'], parsed_line['code'], parsed_line['script']]) self._ts.add(lockid, parsed_line['iso_datetime'], locktime) rq.register_access(parsed_line) except ValueError: self.write_skipped_log('; '.join([lockid, parsed_line['original_date'], parsed_line['original_agent']])) continue else: pass # SciELO Mode Accesses rq.register_access(parsed_line) rq.send()
def _is_valid_gzip(self): if not utils.check_file_format(self._file) == 'gzip': logger.warning('Invalid gzip file: %s' % self._file) return False return True
def test_check_file_format_gz(self): result = check_file_format(os.path.join(os.path.dirname(__file__), 'samples/2015-06-07_scielo.br.log.gz')) self.assertEqual('gzip', result)
def test_check_file_format_gz(self): result = check_file_format( os.path.join(os.path.dirname(__file__), 'samples/2015-06-07_scielo.br.log.gz')) self.assertEqual('gzip', result)