def get_sessions_for_user(user_id): lolomos = db.session.query(Lolomo.single_page_session_id, func.max(Lolomo.timestamp), func.max(Lolomo.ip)).join( User).filter( User.id == Lolomo.user_id).filter( User.extension_id == user_id).group_by(Lolomo.single_page_session_id, Lolomo.ip).all() res = [{lolomo[0]: {"creation_date": lolomo[1].timestamp(), "creation_date_human": str(lolomo[1]), "ip": anonymize_ip(lolomo[2]), "links": [ { "rel": "thumbnails", "href": get_api_root() + f"api/user/{user_id}/session/{lolomo[0]}/thumbnails" }, { "rel": "watches", "href": get_api_root() + f"api/user/{user_id}/session/{lolomo[0]}/watches" }, { "rel": "lolomos", "href": get_api_root() + f"api/user/{user_id}/session/{lolomo[0]}/lolomos" }, { "rel": "country", "href": f"https://ipinfo.io/{anonymize_ip(lolomo[2])}/country" } ]}} for lolomo in lolomos] res = sorted(res, key=lambda x: -list(x.items())[0][1]["creation_date"]) for x in res: list(x.items())[0][1].update({"session_id": list(x.items())[0][0]}) res = [list(x.values())[0] for x in res] return json.dumps(res), 200, {'Content-Type': 'application/json'}
def get_latest_watches(limit, date_from, date_to): from_date = dateparser.parse(date_from) to_date = dateparser.parse(date_to) watches = db.session.query(NetflixWatchMetadata) \ .filter(NetflixWatchMetadata.timestamp >= from_date) \ .filter(NetflixWatchMetadata.timestamp <= to_date) \ .order_by(NetflixWatchMetadata.timestamp.desc()) if limit != -1: watches = watches.limit(limit) res = [ {"video_id": w.video_id, "timestamp": w.timestamp.timestamp(), "timestamp_human": str(w.timestamp), "duration_seconds": (w.stop_time - w.timestamp).seconds if w.stop_time else "unknown", "track_id": w.track_id, "pseudo_ip": anonymize_ip(w.ip), "links": [ {"rel": "session", "href": get_api_root() + f"api/user/{w.user.extension_id}/session/{w.single_page_session_id}"}, {"rel": "user", "href": get_api_root() + f"api/user/{w.user.extension_id}"}, {"rel": "content", "href": f"https://platform-api.vod-prime.space/api/emns/provider/4/identifier/{w.video_id}", } ] } for w in watches] return json.dumps(res, cls=SetEncoder), 200, {'Content-Type': 'application/json'}
def get_latest_logs(limit, date_from, date_to): from_date = dateparser.parse(date_from) to_date = dateparser.parse(date_to) logs = db.session.query(NetflixSuggestMetadata).filter(NetflixSuggestMetadata.timestamp >= from_date) \ .filter(NetflixSuggestMetadata.timestamp <= to_date) \ .order_by( NetflixSuggestMetadata.timestamp.desc()) if limit != -1: logs = logs.limit(limit) res = [ { "user": log.user.extension_id, "pseudo_ip": anonymize_ip(log.ip), "timestamp": log.timestamp.timestamp(), "timestamp_human": str(log.timestamp), "video_id": log.video_id, "track_id": log.track_id, "links": [ {"rel": "session", "href": get_api_root() + f"api/user/{log.user.extension_id}/session/{log.single_page_session_id}"}, {"rel": "user", "href": get_api_root() + f"api/user/{log.user.extension_id}"}, {"rel": "content", "href": f"https://platform-api.vod-prime.space/api/emns/provider/4/identifier/{log.video_id}", }, {"rel": "netflix-thumbnail", "href": get_api_root() + f"api/netflix/thumbnail/{log.video_id}"} ] } for log in logs] return json.dumps(res, cls=SetEncoder), 200, {'Content-Type': 'application/json'}
def get_masked_ip(headers: Dict) -> Optional[str]: if "x-forwarded-for" in headers.keys(): field = "x-forwarded-for" elif "X-Forwarded-For" in headers.keys(): field = "X-Forwarded-For" else: return None x_forwarded_for = headers[field] masked_ip = None try: # "1.2.3.4, 1.2.3.4" -> "1.2.3.4" the last field if "," in x_forwarded_for: ip = x_forwarded_for.split(",")[-1].strip() else: ip = x_forwarded_for # "1.2.3.4" -> "1.2.3.0" masked_ip = anonymize_ip(ip) except: return None return masked_ip
def run(self, events): for event in events: srcString = "{:%Y-%m-%d}#{}#{}".format( dateutil_parser.parse(event._src['time']), event._src['ip'], event._src['userAgent']) sessDict = { 'id': md5(srcString.encode()).hexdigest(), 'srcString': srcString } event._sess = sessDict # Anonymize IP if self._anonymize_ip_mask != self.FULL_IP_MASK: event._src['ip'] = anonymize_ip(event._src['ip'], self._anonymize_ip_mask) logger.debug( 'SESSION_FILTER:: Event: {} Session string: {}'.format( event._id, srcString)) yield event
def register_view(): """Add IP address of client to icon set entry in views.json unless it already exists""" icon_set_id = request.args.get("iconSetId") ip_address = request.remote_addr ip_address_anonymized = anonymize_ip(ip_address) # Add IP address to corresponding icon set if icon_set_id not in view_addresses: view_addresses[icon_set_id] = [ip_address_anonymized] view_counts[icon_set_id] = 1 elif ip_address_anonymized not in view_addresses[icon_set_id]: view_addresses[icon_set_id].append(ip_address_anonymized) view_counts[icon_set_id] += 1 else: return "" with open(path_views, "w+") as view_file: # Write updated object to file json.dump(view_addresses, view_file) return ""
def test_default(self): address = "95.239.169.11" anonymized = anonymize_ip(address) self.assertEqual(anonymized, "95.239.169.0")
def test_integer(self): # Integer representation of 176.126.30.183 address = 2961055415 anonymized = anonymize_ip(address) self.assertEqual(anonymized, "176.126.30.0")
def test_last_three_octets(self): address = "76.173.77.243" mask = "255.0.0.0" anonymized = anonymize_ip(address, ipv4_mask=mask) self.assertEqual(anonymized, "76.0.0.0")
def test_last_two_octets(self): address = "224.6.226.252" mask = "255.255.0.0" anonymized = anonymize_ip(address, ipv4_mask=mask) self.assertEqual(anonymized, "224.6.0.0")
def gdpr_check(addr): ip = addr syndbb.gdpr = 1 if syndbb.gdpr: ip = anonymize_ip(addr) return ip
def test_default(self): address = "5219:3a94:fdc5:19e1:70a3:b2c4:40ef:ae03" anonymized = anonymize_ip(address) self.assertEqual(anonymized, "5219:3a94:fdc5:19e1::")
def test_integer(self): # Integer representation of 2d09:1b4b:9fd0:9edf:a856:5086:69ec:9282 address = 59862544098679838285986760092514292354 anonymized = anonymize_ip(address) self.assertEqual(anonymized, "2d09:1b4b:9fd0:9edf::")
def test_last_two_blocks(self): address = "4942:70b7:1441:7814:4f1b:ab59:1501:ddec" mask = "ffff:ffff:ffff:ffff:ffff:ffff::" anonymized = anonymize_ip(address, ipv6_mask=mask) self.assertEqual(anonymized, "4942:70b7:1441:7814:4f1b:ab59::")
def test_last_block(self): address = "c03d:13b:4757:674a:7563:cd57:6ac0:57c5" mask = "ffff:ffff:ffff:ffff:ffff:ffff:ffff:0000" anonymized = anonymize_ip(address, ipv6_mask=mask) self.assertEqual(anonymized, "c03d:13b:4757:674a:7563:cd57:6ac0:0")
def lambda_handler(event_in, context): event_out = { "user_agent": event_in["requestContext"]["identity"]["userAgent"], "ip": event_in["requestContext"]["identity"]["sourceIp"], "device_info": None, "geo_info": None, } # extract post data post_data = {} if event_in.get("body") is not None: post_data = json.loads(event_in["body"])["requests"][0] post_data = dict(parse_qsl(urlparse(post_data).query)) # map incoming event_in params to readable ones and cast values - simple sanity checks... ;) query_str_data = event_in.get("queryStringParameters", {}) or {} for param in schema.INCOMING: if param.name_in in post_data: event_out[param.name_out] = post_data[param.name_in] elif param.name_in in query_str_data: event_out[param.name_out] = query_str_data[param.name_in] # cast values if param.name_out in event_out: if param.type == bool: event_out[param.name_out] = bool(int( event_out[param.name_out])) elif param.type == int: if event_out[param.name_out] == "": event_out[param.name_out] = None else: event_out[param.name_out] = int(event_out[param.name_out]) else: event_out[param.name_out] = param.type( event_out[param.name_out]) # Device lookup device_detection_enabled = True if os.environ.get( "DEVICE_DETECTION_ENABLED") == "true" else False if device_detection_enabled and event_out["user_agent"]: # cache hit? cached = LOOKUP_CACHE["user_agent"][event_out["user_agent"]] if cached: device_info = cached else: response = requests.get( API_USERSTACK_ENDPOINT, { "access_key": os.environ["USERSTACK_API_TOKEN"], "ua": event_out["user_agent"] }, ) response.raise_for_status() response_json = response.json() if "error" in response_json: raise RuntimeError( f"User Agent Lookup not successful, response was: {response_json}" ) device_info = response_json LOOKUP_CACHE["user_agent"][event_out["user_agent"]] = response_json event_out["device_info"] = device_info # mask ip address if os.environ.get("IP_ADDRESS_MASKING_ENABLED") == "true": event_out["ip"] = anonymize_ip(event_out["ip"]) # IP lookup ip_geocoding_enabled = True if os.environ.get( "IP_GEOCODING_ENABLED") == "true" else False if ip_geocoding_enabled and event_out["ip"]: # cache hit? cached = LOOKUP_CACHE["ip"][event_out["ip"]] if cached: geo_info = cached else: response = requests.get( f"{API_IPINFO_ENDPOINT}/{event_out['ip']}", params={"token": os.environ["IP_INFO_API_TOKEN"]}, ) response.raise_for_status() response_json = response.json() # split lon lat string in coords # e.g "48.1374,11.5755" => [latitude=48.1374, longitude=11.5755] long_lat_str = response_json.get("loc", "") if "," in long_lat_str: response_json["loc"] = {} response_json["loc"]["latitude"], response_json["loc"][ "longitude"] = long_lat_str.split(",") response_json["loc"]["latitude"] = float( response_json["loc"]["latitude"]) response_json["loc"]["longitude"] = float( response_json["loc"]["longitude"]) else: response_json["loc"] = { "latitude": None, "longitude": None, } LOOKUP_CACHE["ip"][event_out["ip"]] = response_json geo_info = response_json event_out["geo_info"] = geo_info # event_datetime handling # 1. try to read from event event_datetime = event_out.get("event_datetime") if event_datetime: if event_datetime.isnumeric(): # a) unix timestamp? event_datetime = datetime.fromtimestamp(int(event_datetime)) else: # b) try to parse string event_datetime = date_parse(event_datetime) # 2. Fallback if not set, use API Gateway requestTime if event_datetime is None: # use api gw info requestContext.requestTime # e.g. '06/Apr/2020:09:07:05 +0000' => 2020-04-06T10:37:38+00:00 # parse event_datetime = datetime.strptime( event_in["requestContext"]["requestTime"], "%d/%b/%Y:%H:%M:%S %z") # to e.g. 2020-04-07T11:04.01.1586251321 event_datetime = event_datetime.astimezone( timezone.utc).replace(tzinfo=None) event_out["event_datetime"] = (event_datetime.astimezone( timezone.utc).replace(tzinfo=None).strftime("%Y-%m-%d %H:%M:%S")) # language handling if not event_out.get("language"): # fallback to HTTP Accept-Language language = event_in.get("headers", {}).get("Accept-Language") if language: event_out["language"] = language # send event_to firehose firehose_client.put_record( DeliveryStreamName=os.environ["DELIVERY_STREAM_NAME"], Record={"Data": json.dumps(event_out) + "\n"}, ) return {"statusCode": 200}