def split_log_wrapper(log):
    dirpath, date, machine, _ = awazza.parse_log_name(log)
    logging.info('Splitting log file (pid=%s): %s\n\t\t\t\t\t\t\t\t\t(Date: %s, Machine: %s)',\
        os.getpid(), log, date, machine)

    user_to_fd = {}

    try:
        with open(log, 'r') as logf:
            for line in logf:
                # make an AwazzaLogRequest to handle parsing the user id
                try:
                    user = AwazzaLogRequest(line).user
                except Exception as e:
                    logging.error('Error parsing line: %s\n%s', e, line)
                    continue  # skip this line

                # if we need to open a file for this user, do it
                if not user in user_to_fd:
                    user_path = os.path.join(dirpath,\
                        '%s.%s.%s.user' % (date, machine, user))
                    f = open(user_path, 'w')
                    user_to_fd[user] = f
                else:
                    f = user_to_fd[user]

                # write the record to the corresponding user's file
                f.write(line)
        logf.closed
    except Exception as e:
        logging.error('Error splitting log files: %s\n%s',\
            e, traceback.format_exc())
    finally:
        for fd in user_to_fd.values():
            fd.close()
Esempio n. 2
0
def parse_logs(logs):
    try:
      for log in sorted(logs):

      	dirpath, date, machine, _ = awazza.parse_log_name(log)
    	logging.info('Splitting log file (pid=%s): %s\n\t\t\t\t\t\t\t\t\t(Date: %s, Machine: %s)',\
           os.getpid(), log, date, machine)

	if log.endswith('.gz'):  # Check if the files are gzip or not and uncompress if needed
	   logf = gzip.open(log, 'rb')
	else:
	   logf = open(log, 'r')

	for line in logf:
	   # make an AwazzaLogRequest to handle parsing the user id
           try:
               alr = AwazzaLogRequest(line)
           except Exception as e:
               logging.error('Error parsing line: %s\n%s', e, line)
               continue  # skip this line

	   # Ignore bad requests
	   if alr.response_code > 400:
		continue

	   user = alr.user
           user_path = os.path.join(dirpath,\
       	       '%s.user.fix' % user)
	   # Repeatedly opening slows down the process, but prevents 'too many handles' type errors
           # write the record to the corresponding user's file
	   with open(user_path, 'a') as userf:
	       # Output time, user, and user agent string only
               userf.write(str(alr.ts) + ' ' + alr.user + ' ' + alr.user_agent + '\n')

	logf.close()
    except Exception as e:
        logging.error('Error splitting log files: %s\n%s',\
            e, traceback.format_exc())