Esempio n. 1
0
def flag_machines(doc):
    """Flag events which are created by machines.

    The list of machines is defined by the `COUNTER-robots Python package
    <https://github.com/inveniosoftware/counter-robots>`_ , which follows the
    `list defined by Project COUNTER
    <https://www.projectcounter.org/appendices/850-2/>`_ that was later split
    into robots and machines by `the Make Data Count project
    <https://github.com/CDLUC3/Make-Data-Count/tree/master/user-agents>`_.

    """
    doc['is_machine'] = 'user_agent' in doc and is_machine(doc['user_agent'])
    return doc
Esempio n. 2
0
def lambda_handler(event, context):
    output = []

    for record in event['records']:
        payload = base64.b64decode(record['data'])
        log = json.loads(payload)

        # Flattens the header fields
        flatheaders = {}
        for item in log['httpRequest']['headers']:
            flatheaders[item['name'].lower()] = item['value']
        del (log['httpRequest']['headers'])
        log['httpRequest']['headers'] = flatheaders

        # Adds ASN number
        log['httpRequest']['asn'] = asndb.lookup(
            log['httpRequest']['clientIp'])[0]

        user_agent_string = log['httpRequest']['headers']['user-agent']

        # Adds user-agent information
        ua_info = getUAInfo(user_agent_string)
        log['httpRequest']['browser'] = ua_info['browser']
        log['httpRequest']['os'] = ua_info['os']
        log['httpRequest']['device'] = ua_info['device']

        # Adds robot information
        device_type = ''
        if is_machine(user_agent_string):
            device_type = 'machine'
        elif is_robot(user_agent_string):
            device_type = 'robot'
        else:
            device_type = 'browser'
        log['httpRequest']['deviceType'] = device_type

        payload = json.dumps(log)

        output_record = {
            'recordId': record['recordId'],
            'result': 'Ok',
            'data': base64.b64encode(payload)
        }
        output.append(output_record)

    print('Successfully processed {} records.'.format(len(event['records'])))

    return {'records': output}
Esempio n. 3
0
def test_is_machine():
    machine_ua = 'Wget/1.14 (linux-gnu)'
    robot_ua = 'AdsBot-Google (+http://www.google.com/adsbot.html)'
    assert is_machine(machine_ua) is True
    assert is_machine(robot_ua) is not True