Esempio n. 1
0
def test_multiple_pats():
    text = 'gary 25 "never quit"'
    pat = '%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}'
    m = grok_match(text, pat)
    assert m['name'] == 'gary' and m['age'] == '25' and m['motto'] == '"never quit"', \
        'grok match failed:%s, %s' % (text, pat, )

    #variable names are not set
    text = 'gary 25 "never quit"'
    pat = '%{WORD} %{INT} %{QUOTEDSTRING}'
    m = grok_match(text, pat)
    assert m == {}, 'grok match failed:%s, %s' % (text, pat, )

    #"male" is not INT
    text = 'gary male "never quit"'
    pat = '%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}'
    m = grok_match(text, pat)
    assert m is None, 'grok match failed:%s, %s' % (text, pat, )

    #nginx log
    text = 'edge.v.iask.com.edge.sinastorage.com 14.18.243.65 6.032s - [21/Jul/2014:16:00:02 +0800]' \
        + ' "GET /edge.v.iask.com/125880034.hlv HTTP/1.0" 200 70528990 "-"' \
        + ' "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)' \
        + ' Chrome/36.0.1985.125 Safari/537.36"'
    pat = '%{HOST:host} %{IP:client_ip} %{NUMBER:delay}s - \[%{DATA:time_stamp}\]' \
        + ' "%{WORD:verb} %{URIPATHPARAM:uri_path} HTTP/%{NUMBER:http_ver}" %{INT:http_status} %{INT:bytes} %{QS}' \
        + ' %{QS:client}'
    m = grok_match(text, pat)
    assert m['host'] == 'edge.v.iask.com.edge.sinastorage.com' and m['client_ip'] == '14.18.243.65' \
        and m['delay'] == '6.032' and m['time_stamp'] == '21/Jul/2014:16:00:02 +0800' and m['verb'] == 'GET' \
        and m['uri_path'] == '/edge.v.iask.com/125880034.hlv' and m['http_ver'] == '1.0' \
        and m['http_status'] == '200' and m['bytes'] == '70528990' \
        and m['client'] == '"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)' \
        + ' Chrome/36.0.1985.125 Safari/537.36"', 'grok match failed:%s, %s' % (text, pat, )
Esempio n. 2
0
def test_multiple_pats():
    text = 'gary 25 "never quit"'
    pat = '%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}'
    m = grok_match(text, pat)
    assert m['name'] == 'gary' and m['age'] == '25' and m['motto'] == '"never quit"', \
        'grok match failed:%s, %s' % (text, pat, )

    #variable names are not set
    text = 'gary 25 "never quit"'
    pat = '%{WORD} %{INT} %{QUOTEDSTRING}'
    m = grok_match(text, pat)
    assert m == {}, 'grok match failed:%s, %s' % (text, pat, )

    #"male" is not INT
    text = 'gary male "never quit"'
    pat = '%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}'
    m = grok_match(text, pat)
    assert m is None, 'grok match failed:%s, %s' % (text, pat, )

    #nginx log
    text = 'edge.v.iask.com.edge.sinastorage.com 14.18.243.65 6.032s - [21/Jul/2014:16:00:02 +0800]' \
        + ' "GET /edge.v.iask.com/125880034.hlv HTTP/1.0" 200 70528990 "-"' \
        + ' "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)' \
        + ' Chrome/36.0.1985.125 Safari/537.36"'
    pat = '%{HOST:host} %{IP:client_ip} %{NUMBER:delay}s - \[%{DATA:time_stamp}\]' \
        + ' "%{WORD:verb} %{URIPATHPARAM:uri_path} HTTP/%{NUMBER:http_ver}" %{INT:http_status} %{INT:bytes} %{QS}' \
        + ' %{QS:client}'
    m = grok_match(text, pat)
    assert m['host'] == 'edge.v.iask.com.edge.sinastorage.com' and m['client_ip'] == '14.18.243.65' \
        and m['delay'] == '6.032' and m['time_stamp'] == '21/Jul/2014:16:00:02 +0800' and m['verb'] == 'GET' \
        and m['uri_path'] == '/edge.v.iask.com/125880034.hlv' and m['http_ver'] == '1.0' \
        and m['http_status'] == '200' and m['bytes'] == '70528990' \
        and m['client'] == '"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)' \
        + ' Chrome/36.0.1985.125 Safari/537.36"', 'grok match failed:%s, %s' % (text, pat, )
def on_message(client, userdata, msg):
    global grok_pattern
    print pygrok.grok_match(msg.payload, grok_pattern, custom_patterns_dir = pats_dir)
    global msgCount
    msgCount = msgCount + 1
    if msgCount > 10:
        client.disconnect()
Esempio n. 4
0
def test_multiple_pats():
    text = 'gary 25 "never quit"'
    pat = "%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}"
    m = grok_match(text, pat)
    assert m["name"] == "gary" and m["age"] == "25" and m["motto"] == '"never quit"', "grok match failed:%s, %s" % (
        text,
        pat,
    )

    # variable names are not set
    text = 'gary 25 "never quit"'
    pat = "%{WORD} %{INT} %{QUOTEDSTRING}"
    m = grok_match(text, pat)
    assert m == {}, "grok match failed:%s, %s" % (text, pat)

    # "male" is not INT
    text = 'gary male "never quit"'
    pat = "%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}"
    m = grok_match(text, pat)
    assert m is None, "grok match failed:%s, %s" % (text, pat)

    # nginx log
    text = (
        "edge.v.iask.com.edge.sinastorage.com 14.18.243.65 6.032s - [21/Jul/2014:16:00:02 +0800]"
        + ' "GET /edge.v.iask.com/125880034.hlv HTTP/1.0" 200 70528990 "-"'
        + ' "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
        + ' Chrome/36.0.1985.125 Safari/537.36"'
    )
    pat = (
        "%{HOSTNAME:host} %{IP:client_ip} %{NUMBER:delay}s - \[%{DATA:time_stamp}\]"
        + ' "%{WORD:verb} %{URIPATHPARAM:uri_path} HTTP/%{NUMBER:http_ver}" %{INT:http_status} %{INT:bytes} %{QS}'
        + " %{QS:client}"
    )
    m = grok_match(text, pat)
    assert (
        m["host"] == "edge.v.iask.com.edge.sinastorage.com"
        and m["client_ip"] == "14.18.243.65"
        and m["delay"] == "6.032"
        and m["time_stamp"] == "21/Jul/2014:16:00:02 +0800"
        and m["verb"] == "GET"
        and m["uri_path"] == "/edge.v.iask.com/125880034.hlv"
        and m["http_ver"] == "1.0"
        and m["http_status"] == "200"
        and m["bytes"] == "70528990"
        and m["client"]
        == '"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
        + ' Chrome/36.0.1985.125 Safari/537.36"'
    ), "grok match failed:%s, %s" % (text, pat)
Esempio n. 5
0
def test_custom_pats():
    custom_pats = {'ID' : '%{WORD}-%{INT}'}
    text = 'Beijing-1104,gary 25 "never quit"'
    pat = '%{ID:user_id},%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}'
    m = grok_match(text, pat, custom_patterns = custom_pats)
    assert m['user_id'] == 'Beijing-1104' and m['name'] == 'gary' and m['age'] == '25' \
        and m['motto'] == '"never quit"', 'grok match failed:%s, %s' % (text, pat, )
Esempio n. 6
0
def test_custom_pats():
    custom_pats = {'ID' : '%{WORD}-%{INT}'}
    text = 'Beijing-1104,gary 25 "never quit"'
    pat = '%{ID:user_id},%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}'
    m = grok_match(text, pat, custom_patterns = custom_pats)
    assert m['user_id'] == 'Beijing-1104' and m['name'] == 'gary' and m['age'] == '25' \
        and m['motto'] == '"never quit"', 'grok match failed:%s, %s' % (text, pat, )
def parse():
    # TODO: do this properly, using the xml module.
    # Write header
    sys.stderr.write('''<?xml version="1.0" encoding="UTF-8"?>\n''')
    sys.stderr.write('''<results version="2">\n''')
    sys.stderr.write('''  <cppcheck version=""/>\n''')
    sys.stderr.write('''  <errors>\n''')

    pattern="%{DATA:fname}:%{INT:lineno}: %{GREEDYDATA:rawmsg} \[%{DATA:label}\] \[%{INT:severity}"

    for l in sys.stdin.readlines():
        m = pygrok.grok_match(l.strip(), pattern)
        if not m:
            continue
        if len(m.keys()) != 5:
            continue
        # Protect Jenkins from bad XML, which makes it barf
        msg = xml.sax.saxutils.quoteattr(m['rawmsg'])
        severity = cpplint_score_to_cppcheck_severity(int(m['severity']))
        sys.stderr.write('''  <error id="%s" severity="%s" msg=%s>\n'''%(m['label'], severity, msg))
        sys.stderr.write('''    <location file="%s" line="%s"/>\n'''%(m['fname'], m['lineno']))
        sys.stderr.write('''  </error>\n''')

    # Write footer
    sys.stderr.write('''  </errors>\n''')
    sys.stderr.write('''</results>\n''')
Esempio n. 8
0
def parse_log():
    import json
    from pygrok import grok_match

    form = request.get_data()
    app.logger.warning( "form:%s", form )

    form = json.loads( form )

    grok_pat = build_grok_pattern( form[ 'log_format' ] )
    app.logger.warning( 'grok_pat:%s', grok_pat )
    
    logs = form[ 'log_examples' ].split( "\n" )
    
    pls = []
    for l in logs:
        pl = grok_match( l, grok_pat )
        pls.append( pl )
        app.logger.warning('pl:%s', json.dumps( pl, indent=4 ) )

    data = { 'parsed_logs': pls }

    ls_conf = build_logstash_conf( form[ 'log_format' ] )
    app.logger.warning( 'ls_conf:%s', ls_conf )

    return json.dumps( data )
Esempio n. 9
0
def test_custom_pats():
    custom_pats = {"ID": "%{WORD}-%{INT}"}
    text = 'Beijing-1104,gary 25 "never quit"'
    pat = "%{ID:user_id},%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}"
    m = grok_match(text, pat, custom_patterns=custom_pats)
    assert (
        m["user_id"] == "Beijing-1104" and m["name"] == "gary" and m["age"] == "25" and m["motto"] == '"never quit"'
    ), "grok match failed:%s, %s" % (text, pat)
Esempio n. 10
0
def test_custom_pat_files():
    pats_dir = './test_patterns'
    text = 'Beijing-1104,gary 25 "never quit"'
    #pattern "ID" is defined in ./test_patterns/pats
    pat = '%{ID:user_id},%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}'
    m = grok_match(text, pat, custom_patterns_dir=pats_dir)
    assert m['user_id'] == 'Beijing-1104' and m['name'] == 'gary' and m['age'] == '25' \
        and m['motto'] == '"never quit"', 'grok match failed:%s, %s' % (text, pat, )
Esempio n. 11
0
def test_custom_pat_files():
    pats_dir = './test_patterns'
    text = 'Beijing-1104,gary 25 "never quit"'
    #pattern "ID" is defined in ./test_patterns/pats
    pat = '%{ID:user_id},%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}'
    m = grok_match(text, pat, custom_patterns_dir = pats_dir)
    assert m['user_id'] == 'Beijing-1104' and m['name'] == 'gary' and m['age'] == '25' \
        and m['motto'] == '"never quit"', 'grok match failed:%s, %s' % (text, pat, )
Esempio n. 12
0
    def read(self, stream):
        for line in stream.readlines():
            matches = pygrok.grok_match(line, self.pattern)
            if matches is not None:
                yield Point(**matches)

            else:
                logger.warn('not matchined %s' % line)
Esempio n. 13
0
def test_one_pat():
    text = '1024'
    pat = '%{INT:test_int}'
    m = grok_match(text, pat)
    assert m['test_int'] == '1024', 'grok match failed:%s, %s' % (text, pat, )
    
    text = '1024'
    pat = '%{NUMBER:test_num}'
    m = grok_match(text, pat)
    assert m['test_num'] == '1024', 'grok match failed:%s, %s' % (text, pat, )

    text = 'garyelephant '
    pat = '%{WORD:name} '
    m = grok_match(text, pat)
    assert m['name'] == text.strip(), 'grok match failed:%s, %s' % (text, pat, )

    text = '192.168.1.1'
    pat = '%{IP:ip}'
    m = grok_match(text, pat)
    assert m['ip'] == text.strip(), 'grok match failed:%s, %s' % (text, pat, )

    text = 'github.com'
    pat = '%{HOSTNAME:website}'
    m = grok_match(text, pat)
    assert m['website'] == text.strip(), 'grok match failed:%s, %s' % (text, pat, )

    text = '1989-11-04 05:33:02+0800'
    pat = '%{TIMESTAMP_ISO8601:ts}'
    m = grok_match(text, pat)
    assert m['ts'] == text.strip(), 'grok match failed:%s, %s' % (text, pat, )

    text = 'github'
    pat = '%{WORD}'
    m = grok_match(text, pat)
    assert m == {}, 'grok match failed:%s, %s' % (text, pat, )
    #you get nothing because variable name is not set, compare "%{WORD}" and "%{WORD:variable_name}"

    text = 'github'
    pat = '%{NUMBER:test_num}'
    m = grok_match(text, pat)
    assert m is None, 'grok match failed:%s, %s' % (text, pat, )
    #not match
    
    text = '1989'
    pat = '%{NUMBER:birthyear:int}'
    m = grok_match(text, pat)
    assert m == {'birthyear': 1989}, 'grok match failed:%s, %s' % (text, pat, )
Esempio n. 14
0
def test_custom_pat_files():
    import os.path

    pats_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_patterns")
    text = 'Beijing-1104,gary 25 "never quit"'
    # pattern "ID" is defined in ./test_patterns/pats
    pat = "%{ID:user_id},%{WORD:name} %{INT:age} %{QUOTEDSTRING:motto}"
    m = grok_match(text, pat, custom_patterns_dir=pats_dir)
    assert (
        m["user_id"] == "Beijing-1104" and m["name"] == "gary" and m["age"] == "25" and m["motto"] == '"never quit"'
    ), "grok match failed:%s, %s" % (text, pat)
Esempio n. 15
0
def grok_parser(pattern, lines, cb_oups=None, date_key=None, date_format=None):
    for line in lines:
        m = grok_match(line, pattern)
        if m is None:
            if cb_oups is not None:
                cb_oups(line)
            continue
        if None not in {date_key, date_format}:
            ts = datetime.strptime(m[date_key], date_format)
            m['timestamp'] = ts
        yield m
Esempio n. 16
0
    def __call__(self, env, start_response):
        req = Request(env)
        resp = req.get_response(self.app)

        try:
            (version, account, container,
             objname) = split_path(req.path_info, 1, 4, True)
        except ValueError:
            return resp(env, start_response)

        is_grok_request = req.params.has_key(
            'grok') or 'grok-pattern' in req.headers

        # grok request has to be explicit, and only expected for GET operations
        if not req.method == 'GET' or not is_grok_request:
            return resp(env, start_response)

        self.logger.debug('Calling grok middleware')

        # make sure we have an object to work on
        if not objname or not resp.status_int == 200:
            return resp(env, start_response)

        # the grok pattern is expected to be in the request headers
        # if the pattern is missing, we ignore the grok request
        pattern = req.headers.get('grok-pattern')
        if not pattern:
            self.logger.debug(
                'Object found, but no pattern requested, aborting')
            return self.get_err_response('Grok pattern is missing')(
                env, start_response)

        self.logger.debug('Starting grok operation')

        # we are going to assume the retrieved object is string object
        # and iterate through lines of resp.body and execute grok_match
        grokked_content = ''
        try:
            strbuf = StringIO.StringIO(resp.body)
            for line in strbuf:
                parsed_line = pygrok.grok_match(line, pattern)
                grokked_content += json.dumps(parsed_line) + '\n'
        except Exception as e:
            return self.get_err_response(str(e))(env, start_response)

        resp.body = grokked_content

        return resp(env, start_response)
Esempio n. 17
0
def parse_file(path):
    files = open(path)
    new_files = open(path.replace('_old',''), mode='w')
    for elem in files:
        fields= []
        nginx_dict = pygrok.grok_match(elem, pattern['nginx'])
        for item in key_order:
            if item=='':
                fields.append('-')
            else:
                fields.append(nginx_dict[item])
        fields = '\t'.join(fields)+'\n'
        new_files.write(fields)
    files.close()
    new_files.close()
    one = {'log_name': path, '__CREATE_TIME__':datetime.datetime.now(utc)}
    finished_nginx_log.insert_one(one)
    def __call__(self, env, start_response):
        req = Request(env)
        resp = req.get_response(self.app)

        try:
            (version, account, container, objname) = split_path(req.path_info, 1, 4, True)
        except ValueError:
            return resp(env, start_response)

        is_grok_request = req.params.has_key("grok") or "grok-pattern" in req.headers

        # grok request has to be explicit, and only expected for GET operations
        if not req.method == "GET" or not is_grok_request:
            return resp(env, start_response)

        self.logger.debug("Calling grok middleware")

        # make sure we have an object to work on
        if not objname or not resp.status_int == 200:
            return resp(env, start_response)

        # the grok pattern is expected to be in the request headers
        # if the pattern is missing, we ignore the grok request
        pattern = req.headers.get("grok-pattern")
        if not pattern:
            self.logger.debug("Object found, but no pattern requested, aborting")
            return self.get_err_response("Grok pattern is missing")(env, start_response)

        self.logger.debug("Starting grok operation")

        # we are going to assume the retrieved object is string object
        # and iterate through lines of resp.body and execute grok_match
        grokked_content = ""
        try:
            strbuf = StringIO.StringIO(resp.body)
            for line in strbuf:
                parsed_line = pygrok.grok_match(line, pattern)
                grokked_content += json.dumps(parsed_line) + "\n"
        except Exception as e:
            return self.get_err_response(str(e))(env, start_response)

        resp.body = grokked_content

        return resp(env, start_response)
Esempio n. 19
0
def test_one_pat():
    text = "1024"
    pat = "%{INT:test_int}"
    m = grok_match(text, pat)
    assert m["test_int"] == "1024", "grok match failed:%s, %s" % (text, pat)

    text = "1024"
    pat = "%{NUMBER:test_num}"
    m = grok_match(text, pat)
    assert m["test_num"] == "1024", "grok match failed:%s, %s" % (text, pat)

    text = "garyelephant "
    pat = "%{WORD:name} "
    m = grok_match(text, pat)
    assert m["name"] == text.strip(), "grok match failed:%s, %s" % (text, pat)

    text = "192.168.1.1"
    pat = "%{IP:ip}"
    m = grok_match(text, pat)
    assert m["ip"] == text.strip(), "grok match failed:%s, %s" % (text, pat)

    text = "github.com"
    pat = "%{HOSTNAME:website}"
    m = grok_match(text, pat)
    assert m["website"] == text.strip(), "grok match failed:%s, %s" % (text, pat)

    text = "1989-11-04 05:33:02+0800"
    pat = "%{TIMESTAMP_ISO8601:ts}"
    m = grok_match(text, pat)
    assert m["ts"] == text.strip(), "grok match failed:%s, %s" % (text, pat)

    text = "github"
    pat = "%{WORD}"
    m = grok_match(text, pat)
    assert m == {}, "grok match failed:%s, %s" % (text, pat)
    # you get nothing because variable name is not set, compare "%{WORD}" and "%{WORD:variable_name}"

    text = "github"
    pat = "%{NUMBER:test_num}"
    m = grok_match(text, pat)
    assert m is None, "grok match failed:%s, %s" % (text, pat)
Esempio n. 20
0
	def check_sample_pattern(self,logfile,pname,pattern):
		#logging.info('check sample {0}'.format(logfile))
		line_count=0
		match_count=0
		with open(logfile,'r') as lf:
			for line in lf.read().splitlines():
				m=pygrok.grok_match(line,pattern,custom_patterns_dir=self.custpattern_dir)
				line_count+=1
				if m is not None:
					match_count+=1
					if pname=='@':
						print '  {0}) {2}{1}{3}'.format(line_count,line,COLOR_WHITE,COLOR_RESET)
						print '    => {0}'.format(m)
				else:
						print '  {0}) {2}{1}{3}'.format(line_count,line,COLOR_RED,COLOR_RESET)
		match_percent=match_count*100/line_count
		if match_percent==100:
			logging.info('{1} : match percent {0} %'.format(match_percent,logfile))
		elif match_percent>90:
			logging.warning('{1} : match percent {0} %'.format(match_percent,logfile))
		else:
			logging.error('{1} : match percent {0} %'.format(match_percent,logfile))
Esempio n. 21
0
def parse_single_line( log_format, log ):
    """return None if fail to parse log"""
    from grok.grok import build_grok_pattern
    from string import Template

    # add beginning(^) and ending($) regex character
    t = Template( '^$pattern$$' )
    grok_pat = build_grok_pattern( log_format )
    print grok_pat
    parsed_log = grok_match( log, t.substitute( pattern=grok_pat ) )

    if parsed_log is None:
        return None

    for pos, format in log_format.items():
        if format[ 'type' ] != 'field':
            continue

        field_name = format[ 'name' ]
        field_type = format[ 'field_type' ].lower()

        if field_type == 'integer/long':
            # convert numbers
            if ( isinstance( parsed_log[ field_name ], int ) is not True ) \
                or ( isinstance( parsed_log[ field_name ], long ) is not True ):

                try:
                    v = int( parsed_log[ field_name ] )
                except ValueError as e:
                    return None

                parsed_log[ field_name ] = v

        elif field_type == 'float/double':
            # convert numbers
            if isinstance( parsed_log[ field_name ], float ) is not True:
                try:
                    v = float( parsed_log[ field_name ] )
                except ValueError as e:
                    return None

                parsed_log[ field_name ] = v

        elif field_type == 'IP' and format[ 'to_geoip' ] is True:
            # ip to city and isp info
            geoip_f = field_name + '#'

            try:
                prj_dir = os.path.dirname( os.path.dirname( os.path.abspath( __file__ ) ) )
                mmdb = os.path.join( prj_dir, 'dependencies', 'geoip2', 'world_city_geoip2.mmdb' )
                reader = geoip2.database.Reader( mmdb, locales=['en'] )
                try:
                    response = reader.city( parsed_log[ field_name ] )

                    v = {
                        'country': response.country.name,
                        'region': response.subdivisions.most_specific.name, # 省
                        'city': response.city.name,
                        'isp': response.postal.code,
                    }

                except geoip2.errors.AddressNotFoundError as e:
                    v = { 'country': 'Not Found', 'region': 'Not Found', 'city': 'Not Found', 'isp': 'Not Found' }

                reader.close()

            except IOError as e:
                raise IPDBNotFoundError()

            parsed_log[ geoip_f ] = v

        elif field_type == 'json':
            # load json from string
            json_f = field_name + '_json'
            parsed_log[ json_f ] = json.loads( parsed_log[ field_name ] )

        elif field_type == 'custom_date':
            # parse date in custom format
            pass

        elif field_type == 'httpdate':
            # check whether the value of this field is httpdate or not
            # 不需要判断,在前面已判断
            pass

        elif field_type == 'iso8601':
            pass

    return parsed_log