コード例 #1
0
ファイル: kafka_consumer.py プロジェクト: xiaodin1/ghost_sa
def do_insert(msg):
    try:
        data = json.loads(msg.value.decode('utf-8'))['data']
        offset = msg.offset
        print(offset)
        # print(data['project'])
        insert_data(project=data['project'], data_decode=data['data_decode'], User_Agent=data['User_Agent'], Host=data['Host'], Connection=data['Connection'], Pragma=data['Pragma'], Cache_Control=data['Cache_Control'], Accept=data['Accept'], Accept_Encoding=data['Accept_Encoding'], Accept_Language=data['Accept_Language'], ip=data['ip'], ip_city=data['ip_city'],
                    ip_asn=data['ip_asn'], url=data['url'], referrer=data['referrer'], remark=data['remark'], ua_platform=data['ua_platform'], ua_browser=data['ua_browser'], ua_version=data['ua_version'], ua_language=data['ua_language'], ip_is_good=data['ip_is_good'], ip_asn_is_good=data['ip_asn_is_good'], created_at=data['created_at'], updated_at=data['updated_at'], use_kafka=False)
    except Exception:
        error = traceback.format_exc()
        write_to_log(filename='kafka_consumer',
                     defname='do_insert', result=error)
コード例 #2
0
ファイル: subscribe.py プロジェクト: xiaodin1/ghost_sa
def realtime_subscribe(broker):
    consumer = KafkaConsumer('event_topic', bootstrap_servers=[broker])
    for message in consumer:
        data = json.loads(message.value.decode('utf-8'))
        # pprint.pprint(message.value.decode('utf-8'))
        json_data = json.dumps(data)
        # pprint.pprint(json_data)
        remark = ''
        project = data['project']
        ua_platform = data['properties']['$os'] if '$os' in data[
            'properties'] else ''  #客户端操作系统
        ua_browser = data['properties']['$browser'] if '$browser' in data[
            'properties'] else ''  #客户端的浏览器
        ua_version = data['properties'][
            '$browser_version'] if '$browser_version' in data[
                'properties'] else ''  #客户端浏览器的版本
        ip = data['properties']['$ip'] if '$ip' in data['properties'] else ''
        ip_city, ip_is_good = get_addr(ip)
        ip_asn, ip_asn_is_good = get_asn(ip)
        if ip_is_good == 0:
            ip_city = '{}'
        if ip_asn_is_good == 0:
            ip_asn = '{}'
        referrer = data['properties'][
            '$latest_referrer'] if '$latest_referrer' in data[
                'properties'] else ''
        insert_data(project=project,
                    data_decode=data,
                    User_Agent='',
                    Host='',
                    Connection='',
                    Pragma='',
                    Cache_Control='',
                    Accept='',
                    Accept_Encoding='',
                    Accept_Language='',
                    ip=ip,
                    ip_city=ip_city,
                    ip_asn=ip_asn,
                    url='',
                    referrer=referrer,
                    remark=remark,
                    ua_platform=ua_platform,
                    ua_browser=ua_browser,
                    ua_version=ua_version,
                    ua_language='',
                    ip_is_good=ip_is_good,
                    ip_asn_is_good=ip_asn_is_good,
                    use_kafka=admin.use_kafka)
コード例 #3
0
ファイル: import_from_sa.py プロジェクト: xiaodin1/ghost_sa
def upload_events_from_pickle_to_sql(project='tvcbook', remark='production'):
    #所有的文件
    filelist = []
    dirpath = os.path.join('data_export', project, remark, 'events')
    for maindir, subdir, file_name_list in os.walk(dirpath):
        # print("1:",maindir) #当前主目录
        # print("2:",subdir) #当前主目录下的所有目录
        # print("3:",str(file_name_list))  #当前主目录下的所有文件
        # file_name_list.sort()
        # subdir.sort()
        for filename in file_name_list:
            apath = os.path.join(maindir, filename)  #合并成一个完整路径
            filelist.append(apath)
    #   print(file_name_list)
    filelist.sort()
    # print(filelist)
    for pkl in filelist:
        # print(pkl)
        with open(pkl, "rb") as f2:
            results = pickle._loads(f2.read())
        # p = multiprocessing.Pool(processes = 3)
        for item in results:
            # # print(item)
            try:
                itemdict = json.loads(item)
                all_json = {
                    "properties": itemdict,
                    "distinct_id": itemdict["distinct_id"],
                    "event": itemdict["event"],
                    "type": "track"
                }
                # first_id = itemdict['first_id'] if 'first_id' in itemdict else None
                # second_id = itemdict['second_id'] if 'second_id' in itemdict else None
                # unionid = itemdict['unionid'] if 'unionid' in itemdict else None
                # id = itemdict['id'] if 'id' in itemdict else None
                ip_city, ip_is_good = get_addr(itemdict["$ip"])
                ip_asn, ip_asn_is_good = get_asn(itemdict["$ip"])
                if ip_is_good == 0:
                    ip_city = '{}'
                if ip_asn_is_good == 0:
                    ip_asn = '{}'
                print(all_json)
                created_at = time.mktime(
                    time.strptime(itemdict["time"].split('.')[0],
                                  '%Y-%m-%d %H:%M:%S'))
                # all_json = json.dumps(itemdict,ensure_ascii=False)
                insert_data(project='tvcbook',
                            data_decode=all_json,
                            User_Agent=None,
                            Host=None,
                            Connection=None,
                            Pragma=None,
                            Cache_Control=None,
                            Accept=None,
                            Accept_Encoding=None,
                            Accept_Language=None,
                            ip=itemdict["$ip"] if "$ip" in itemdict else None,
                            ip_city=ip_city,
                            ip_asn=ip_asn,
                            url=None,
                            referrer=itemdict["$referrer"]
                            if "$referrer" in itemdict else None,
                            remark=remark,
                            ua_platform=itemdict["$lib"]
                            if "$lib" in itemdict else None,
                            ua_browser=itemdict["$browser"]
                            if "$browser" in itemdict else None,
                            ua_version=itemdict["$browser_version"]
                            if "$browser_version" in itemdict else None,
                            ua_language=None,
                            ip_is_good=ip_is_good,
                            ip_asn_is_good=ip_asn_is_good,
                            created_at=created_at)
            except Exception:
                error = traceback.format_exc()
                write_to_log(filename='import_from_sa',
                             defname='upload_events_from_pickle_to_sql',
                             result=error)
        # f2.close()
        #   p.apply_async(func=insert_data,kwds={
        #     "project":"tvcbook",
        #     "data_decode":all_json,
        #     "User_Agent":None,
        #     "Host":None,
        #     "Connection":None,
        #     "Pragma":None,
        #     "Cache_Control":None,
        #     "Accept":None,
        #     "Accept_Encoding":None,
        #     "Accept_Language":None,
        #     "ip":itemdict["$ip"] if "$ip" in itemdict else None,
        #     "ip_city":ip_city,
        #     "ip_asn":ip_asn,
        #     "url":None,
        #     "referrer":itemdict["$referrer"] if "$referrer" in itemdict else None,
        #     "remark":'production',
        #     "ua_platform":itemdict["$lib"] if "$lib" in itemdict else None,
        #     "ua_browser":itemdict["$browser"] if "$browser" in itemdict else None,
        #     "ua_version":itemdict["$browser_version"] if "$browser_version" in itemdict else None,
        #     "ua_language":None,
        #     "ip_is_good":ip_is_good,
        #     "ip_asn_is_good":ip_asn_is_good,
        #     "created_at":created_at})
        #   # insert_data
        # p.close()
        # p.join()
        os.remove(pkl)
コード例 #4
0
def do_insert(msg):
    try:
        group = json.loads(
            msg.value.decode('utf-8'))['group'] if "group" in json.loads(
                msg.value.decode('utf-8')) else None
        data = json.loads(msg.value.decode('utf-8'))['data']
        offset = msg.offset
        print(offset)
        if group == 'event_track':
            # print(data['project'])
            insert_data(project=data['project'],
                        data_decode=data['data_decode'],
                        User_Agent=data['User_Agent'],
                        Host=data['Host'],
                        Connection=data['Connection'],
                        Pragma=data['Pragma'],
                        Cache_Control=data['Cache_Control'],
                        Accept=data['Accept'],
                        Accept_Encoding=data['Accept_Encoding'],
                        Accept_Language=data['Accept_Language'],
                        ip=data['ip'],
                        ip_city=data['ip_city'],
                        ip_asn=data['ip_asn'],
                        url=data['url'],
                        referrer=data['referrer'],
                        remark=data['remark'],
                        ua_platform=data['ua_platform'],
                        ua_browser=data['ua_browser'],
                        ua_version=data['ua_version'],
                        ua_language=data['ua_language'],
                        ip_is_good=data['ip_is_good'],
                        ip_asn_is_good=data['ip_asn_is_good'],
                        created_at=data['created_at'],
                        updated_at=data['updated_at'],
                        use_kafka=False)
        elif group == 'installation_track':
            insert_installation_track(project=data['project'],
                                      data_decode=data['data_decode'],
                                      User_Agent=data['User_Agent'],
                                      Host=data['Host'],
                                      Connection=data['Connection'],
                                      Pragma=data['Pragma'],
                                      Cache_Control=data['Cache_Control'],
                                      Accept=data['Accept'],
                                      Accept_Encoding=data['Accept_Encoding'],
                                      Accept_Language=data['Accept_Language'],
                                      ip=data['ip'],
                                      ip_city=data['ip_city'],
                                      ip_asn=data['ip_asn'],
                                      url=data['url'],
                                      referrer=data['referrer'],
                                      remark=data['remark'],
                                      ua_platform=data['ua_platform'],
                                      ua_browser=data['ua_browser'],
                                      ua_version=data['ua_version'],
                                      ua_language=data['ua_language'],
                                      ip_is_good=data['ip_is_good'],
                                      ip_asn_is_good=data['ip_asn_is_good'],
                                      created_at=data['created_at'],
                                      updated_at=data['updated_at'],
                                      use_kafka=False)
        elif group == 'shortcut_history':
            insert_shortcut_history(short_url=data['short_url'],
                                    result=data['status'],
                                    cost_time=data['time2'],
                                    ip=data['ip'],
                                    user_agent=data['user_agent'],
                                    accept_language=data['accept_language'],
                                    ua_platform=data['ua_platform'],
                                    ua_browser=data['ua_browser'],
                                    ua_version=data['ua_version'],
                                    ua_language=data['ua_language'],
                                    created_at=data['created_at'])
        elif group == 'shortcut_read':
            insert_shortcut_read(short_url=data['short_url'],
                                 ip=data['ip'],
                                 user_agent=data['user_agent'],
                                 accept_language=data['accept_language'],
                                 ua_platform=data['ua_platform'],
                                 ua_browser=data['ua_browser'],
                                 ua_version=data['ua_version'],
                                 ua_language=data['ua_language'],
                                 referrer=data['referrer'],
                                 created_at=data['created_at'])
        else:
            insert_data(project=data['project'],
                        data_decode=data['data_decode'],
                        User_Agent=data['User_Agent'],
                        Host=data['Host'],
                        Connection=data['Connection'],
                        Pragma=data['Pragma'],
                        Cache_Control=data['Cache_Control'],
                        Accept=data['Accept'],
                        Accept_Encoding=data['Accept_Encoding'],
                        Accept_Language=data['Accept_Language'],
                        ip=data['ip'],
                        ip_city=data['ip_city'],
                        ip_asn=data['ip_asn'],
                        url=data['url'],
                        referrer=data['referrer'],
                        remark=data['remark'],
                        ua_platform=data['ua_platform'],
                        ua_browser=data['ua_browser'],
                        ua_version=data['ua_version'],
                        ua_language=data['ua_language'],
                        ip_is_good=data['ip_is_good'],
                        ip_asn_is_good=data['ip_asn_is_good'],
                        created_at=data['created_at'],
                        updated_at=data['updated_at'],
                        use_kafka=False)
    except Exception:
        error = traceback.format_exc()
        write_to_log(filename='kafka_consumer',
                     defname='do_insert',
                     result=error)