def process_request(self, request): """Adds is_human field to the request object. This is used to filter non-human activity from the usage logs""" user_agent = request.META.get('HTTP_USER_AGENT', None) request.is_human = True if user_agent is None or robot_detection.is_robot(user_agent): request.is_human = False
def process_request(self, request): """Add is_human field to the request object. This is used to filter non-human activity from the usage logs """ user_agent = request.META.get('HTTP_USER_AGENT', None) request.is_human = True if user_agent is None or robot_detection.is_robot(user_agent): request.is_human = False
def thread_index(request, mlist_fqdn, threadid, month=None, year=None): ''' Displays all the email for a given thread identifier ''' store = get_store(request) thread = store.get_thread(mlist_fqdn, threadid) if not thread: raise Http404 prev_thread, next_thread = store.get_thread_neighbors(mlist_fqdn, threadid) sort_mode = request.GET.get("sort", "thread") set_message_votes(thread.starting_email, request.user) # Tags tag_form = AddTagForm() try: tags = Tag.objects.filter(threadid=threadid, list_address=mlist_fqdn) except Tag.DoesNotExist: tags = [] # Favorites fav_action = "add" if request.user.is_authenticated(): try: Favorite.objects.get(list_address=mlist_fqdn, threadid=threadid, user=request.user) except Favorite.DoesNotExist: pass else: fav_action = "rm" # Category category, category_form = get_category_widget(request, thread.category) # Extract relative dates today = datetime.date.today() days_old = today - thread.starting_email.date.date() days_inactive = today - thread.last_email.date.date() mlist = store.get_list(mlist_fqdn) subject = stripped_subject(mlist, thread.starting_email.subject) # Last view last_view = None if request.user.is_authenticated(): last_view_obj, created = LastView.objects.get_or_create( list_address=mlist_fqdn, threadid=threadid, user=request.user) if not created: last_view = last_view_obj.view_date last_view_obj.save() # update timestamp # get the number of unread messages if last_view is None: if request.user.is_authenticated(): unread_count = len(thread) else: unread_count = 0 else: # XXX: Storm-specific unread_count = thread.replies_after(last_view).count() # Flash messages flash_messages = [] flash_msg = request.GET.get("msg") if flash_msg: flash_msg = { "type": FLASH_MESSAGES[flash_msg][0], "msg": FLASH_MESSAGES[flash_msg][1] } flash_messages.append(flash_msg) # TODO: eventually move to a middleware ? # http://djangosnippets.org/snippets/1865/ is_bot = True user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: is_bot = robot_detection.is_robot(user_agent) context = { 'mlist': mlist, 'threadid': threadid, 'subject': subject, 'tags': tags, 'addtag_form': tag_form, 'month': thread.date_active, 'first_mail': thread.starting_email, 'neighbors': (prev_thread, next_thread), 'months_list': get_months(store, mlist.name), 'days_inactive': days_inactive.days, 'days_old': days_old.days, 'sort_mode': sort_mode, 'fav_action': fav_action, 'reply_form': ReplyForm(), 'is_bot': is_bot, 'num_comments': len(thread), 'participants': thread.participants, 'last_view': last_view, 'unread_count': unread_count, 'category_form': category_form, 'category': category, 'flash_messages': flash_messages, } context["participants"].sort(key=lambda x: x[0].lower()) if is_bot: # Don't rely on AJAX to load the replies # The limit is a safety measure, don't let a bot kill the DB context["replies"] = _get_thread_replies(request, thread, limit=1000) return render(request, "thread.html", context)
def testFunc(self): self.assertTrue(robot_detection.is_robot(user_agent))
def thread_index(request, mlist_fqdn, threadid, month=None, year=None): ''' Displays all the email for a given thread identifier ''' mlist = get_object_or_404(MailingList, name=mlist_fqdn) thread = get_object_or_404(Thread, mailinglist=mlist, thread_id=threadid) starting_email = thread.starting_email sort_mode = request.GET.get("sort", "thread") if request.user.is_authenticated(): starting_email.myvote = starting_email.votes.filter( user=request.user).first() else: starting_email.myvote = None # Tags tag_form = AddTagForm() # Favorites fav_action = "add" if request.user.is_authenticated() and Favorite.objects.filter( thread=thread, user=request.user).exists(): fav_action = "rm" # Category category, category_form = get_category_widget(request, thread.category) # Extract relative dates today = datetime.date.today() days_old = today - starting_email.date.date() days_inactive = today - thread.date_active.date() subject = stripped_subject(mlist, starting_email.subject) # Last view last_view = None if request.user.is_authenticated(): last_view_obj, created = LastView.objects.get_or_create( thread=thread, user=request.user) if not created: last_view = last_view_obj.view_date last_view_obj.save() # update timestamp # get the number of unread messages if last_view is None: if request.user.is_authenticated(): unread_count = thread.emails_count else: unread_count = 0 else: unread_count = thread.emails.filter(date__gt=last_view).count() # TODO: eventually move to a middleware ? # http://djangosnippets.org/snippets/1865/ user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: is_bot = robot_detection.is_robot(user_agent) else: is_bot = True # Export button export = { "url": "%s?thread=%s" % (reverse("hk_list_export_mbox", kwargs={ "mlist_fqdn": mlist.name, "filename": "%s-%s" % (mlist.name, thread.thread_id) }), thread.thread_id), "message": _("Download"), "title": _("This thread in gzipped mbox format"), } context = { 'mlist': mlist, 'thread': thread, 'starting_email': starting_email, 'subject': subject, 'addtag_form': tag_form, 'month': thread.date_active, 'months_list': get_months(mlist), 'days_inactive': days_inactive.days, 'days_old': days_old.days, 'sort_mode': sort_mode, 'fav_action': fav_action, 'reply_form': get_posting_form(ReplyForm, request, mlist), 'is_bot': is_bot, 'num_comments': thread.emails_count - 1, 'last_view': last_view, 'unread_count': unread_count, 'category_form': category_form, 'category': category, 'export': export, } if is_bot: # Don't rely on AJAX to load the replies # The limit is a safety measure, don't let a bot kill the DB context["replies"] = _get_thread_replies(request, thread, limit=1000) return render(request, "hyperkitty/thread.html", context)
def is_human(user_agent): if robot_detection.is_robot(user_agent): return False return True
def thread_index(request, mlist_fqdn, threadid, month=None, year=None): ''' Displays all the email for a given thread identifier ''' # pylint: disable=unused-argument mlist = get_object_or_404(MailingList, name=mlist_fqdn) thread = get_object_or_404(Thread, mailinglist=mlist, thread_id=threadid) starting_email = thread.starting_email sort_mode = request.GET.get("sort", "thread") if request.user.is_authenticated(): starting_email.myvote = starting_email.votes.filter( user=request.user).first() else: starting_email.myvote = None # Tags tag_form = AddTagForm() # Favorites fav_action = "add" if request.user.is_authenticated() and Favorite.objects.filter( thread=thread, user=request.user).exists(): fav_action = "rm" # Category category, category_form = get_category_widget(request, thread.category) # Extract relative dates today = datetime.date.today() days_old = today - starting_email.date.date() days_inactive = today - thread.date_active.date() subject = stripped_subject(mlist, starting_email.subject) # Last view last_view = None if request.user.is_authenticated(): last_view_obj, created = LastView.objects.get_or_create( thread=thread, user=request.user) if not created: last_view = last_view_obj.view_date last_view_obj.save() # update timestamp # get the number of unread messages if last_view is None: if request.user.is_authenticated(): unread_count = thread.emails_count else: unread_count = 0 else: unread_count = thread.emails.filter(date__gt=last_view).count() # TODO: eventually move to a middleware ? # http://djangosnippets.org/snippets/1865/ user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: is_bot = robot_detection.is_robot(user_agent) else: is_bot = True # Export button export = { "url": "%s?thread=%s" % ( reverse("hk_list_export_mbox", kwargs={ "mlist_fqdn": mlist.name, "filename": "%s-%s" % (mlist.name, thread.thread_id)}), thread.thread_id), "message": _("Download"), "title": _("This thread in gzipped mbox format"), } context = { 'mlist': mlist, 'thread': thread, 'starting_email': starting_email, 'subject': subject, 'addtag_form': tag_form, 'month': thread.date_active, 'months_list': get_months(mlist), 'days_inactive': days_inactive.days, 'days_old': days_old.days, 'sort_mode': sort_mode, 'fav_action': fav_action, 'reply_form': get_posting_form(ReplyForm, request, mlist), 'is_bot': is_bot, 'num_comments': thread.emails_count - 1, 'last_view': last_view, 'unread_count': unread_count, 'category_form': category_form, 'category': category, 'export': export, } if is_bot: # Don't rely on AJAX to load the replies # The limit is a safety measure, don't let a bot kill the DB context["replies"] = _get_thread_replies(request, thread, limit=1000) return render(request, "hyperkitty/thread.html", context)
def thread_index(request, mlist_fqdn, threadid, month=None, year=None): ''' Displays all the email for a given thread identifier ''' # pylint: disable=unused-argument mlist = get_object_or_404(MailingList, name=mlist_fqdn) thread = get_object_or_404(Thread, mailinglist=mlist, thread_id=threadid) starting_email = thread.starting_email sort_mode = request.GET.get("sort", "thread") if request.user.is_authenticated(): starting_email.myvote = starting_email.votes.filter( user=request.user).first() else: starting_email.myvote = None # Tags tag_form = AddTagForm() # Favorites fav_action = "add" if request.user.is_authenticated() and Favorite.objects.filter( thread=thread, user=request.user).exists(): fav_action = "rm" # Category categories = [ (c.name, c.name.upper()) for c in ThreadCategory.objects.all() ] \ + [("", "no category")] category, category_form = get_category_widget(request, thread.category, categories) # Extract relative dates today = datetime.date.today() days_old = today - starting_email.date.date() days_inactive = today - thread.date_active.date() subject = stripped_subject(mlist, starting_email.subject) # Last view last_view = None if request.user.is_authenticated(): last_view_obj, created = LastView.objects.get_or_create( thread=thread, user=request.user) if not created: last_view = last_view_obj.view_date last_view_obj.save() # update timestamp # get the number of unread messages if last_view is None: if request.user.is_authenticated(): unread_count = thread.emails_count else: unread_count = 0 else: unread_count = thread.emails.filter(date__gt=last_view).count() # Flash messages flash_messages = [] flash_msg = request.GET.get("msg") if flash_msg: flash_msg = { "type": FLASH_MESSAGES[flash_msg][0], "msg": FLASH_MESSAGES[flash_msg][1] } flash_messages.append(flash_msg) # TODO: eventually move to a middleware ? # http://djangosnippets.org/snippets/1865/ user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: is_bot = robot_detection.is_robot(user_agent) else: is_bot = True context = { 'mlist': mlist, 'thread': thread, 'starting_email': starting_email, 'subject': subject, 'addtag_form': tag_form, 'month': thread.date_active, 'months_list': get_months(mlist), 'days_inactive': days_inactive.days, 'days_old': days_old.days, 'sort_mode': sort_mode, 'fav_action': fav_action, 'reply_form': ReplyForm(), 'is_bot': is_bot, 'num_comments': thread.emails_count - 1, 'last_view': last_view, 'unread_count': unread_count, 'category_form': category_form, 'category': category, 'flash_messages': flash_messages, } if is_bot: # Don't rely on AJAX to load the replies # The limit is a safety measure, don't let a bot kill the DB context["replies"] = _get_thread_replies(request, thread, limit=1000) return render(request, "hyperkitty/thread.html", context)
def thread_index(request, mlist_fqdn, threadid, month=None, year=None): ''' Displays all the email for a given thread identifier ''' search_form = SearchForm(auto_id=False) store = get_store(request) thread = store.get_thread(mlist_fqdn, threadid) if not thread: raise Http404 prev_thread, next_thread = store.get_thread_neighbors(mlist_fqdn, threadid) sort_mode = request.GET.get("sort", "thread") set_message_votes(thread.starting_email, request.user) from_url = reverse("thread", kwargs={"mlist_fqdn":mlist_fqdn, "threadid":threadid}) # Tags tag_form = AddTagForm(initial={'from_url' : from_url}) try: tags = Tag.objects.filter(threadid=threadid, list_address=mlist_fqdn) except Tag.DoesNotExist: tags = [] # Favorites fav_action = "add" if request.user.is_authenticated(): try: Favorite.objects.get(list_address=mlist_fqdn, threadid=threadid, user=request.user) except Favorite.DoesNotExist: pass else: fav_action = "rm" # Extract relative dates today = datetime.date.today() days_old = today - thread.starting_email.date.date() days_inactive = today - thread.last_email.date.date() mlist = store.get_list(mlist_fqdn) subject = stripped_subject(mlist, thread.starting_email.subject) # TODO: eventually move to a middleware ? # http://djangosnippets.org/snippets/1865/ is_bot = True user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: is_bot = robot_detection.is_robot(user_agent) context = { 'mlist': mlist, 'threadid': threadid, 'subject': subject, 'tags': tags, 'search_form': search_form, 'addtag_form': tag_form, 'month': thread.date_active, 'first_mail': thread.starting_email, 'neighbors': (prev_thread, next_thread), 'months_list': get_months(store, mlist.name), 'days_inactive': days_inactive.days, 'days_old': days_old.days, 'sort_mode': sort_mode, 'fav_action': fav_action, 'reply_form': ReplyForm(), 'is_bot': is_bot, 'participants': thread.participants, } context["participants"].sort(key=lambda x: x[0].lower()) if is_bot: # Don't rely on AJAX to load the replies context["replies"] = _get_thread_replies(request, thread) return render(request, "thread.html", context)
def thread_index(request, mlist_fqdn, threadid, month=None, year=None): ''' Displays all the email for a given thread identifier ''' search_form = SearchForm(auto_id=False) store = get_store(request) thread = store.get_thread(mlist_fqdn, threadid) if not thread: raise Http404 prev_thread, next_thread = store.get_thread_neighbors(mlist_fqdn, threadid) sort_mode = request.GET.get("sort", "thread") set_message_votes(thread.starting_email, request.user) from_url = reverse("thread", kwargs={ "mlist_fqdn": mlist_fqdn, "threadid": threadid }) # Tags tag_form = AddTagForm(initial={'from_url': from_url}) try: tags = Tag.objects.filter(threadid=threadid, list_address=mlist_fqdn) except Tag.DoesNotExist: tags = [] # Favorites fav_action = "add" if request.user.is_authenticated(): try: Favorite.objects.get(list_address=mlist_fqdn, threadid=threadid, user=request.user) except Favorite.DoesNotExist: pass else: fav_action = "rm" # Extract relative dates today = datetime.date.today() days_old = today - thread.starting_email.date.date() days_inactive = today - thread.last_email.date.date() mlist = store.get_list(mlist_fqdn) subject = stripped_subject(mlist, thread.starting_email.subject) # TODO: eventually move to a middleware ? # http://djangosnippets.org/snippets/1865/ is_bot = True user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: is_bot = robot_detection.is_robot(user_agent) context = { 'mlist': mlist, 'threadid': threadid, 'subject': subject, 'tags': tags, 'search_form': search_form, 'addtag_form': tag_form, 'month': thread.date_active, 'first_mail': thread.starting_email, 'neighbors': (prev_thread, next_thread), 'months_list': get_months(store, mlist.name), 'days_inactive': days_inactive.days, 'days_old': days_old.days, 'sort_mode': sort_mode, 'fav_action': fav_action, 'reply_form': ReplyForm(), 'is_bot': is_bot, 'participants': thread.participants, } context["participants"].sort(key=lambda x: x[0].lower()) if is_bot: # Don't rely on AJAX to load the replies context["replies"] = _get_thread_replies(request, thread) return render(request, "thread.html", context)
def thread_index(request, mlist_fqdn, threadid, month=None, year=None): ''' Displays all the email for a given thread identifier ''' # pylint: disable=unused-argument mlist = get_object_or_404(MailingList, name=mlist_fqdn) thread = get_object_or_404(Thread, mailinglist=mlist, thread_id=threadid) starting_email = thread.starting_email sort_mode = request.GET.get("sort", "thread") if request.user.is_authenticated(): starting_email.myvote = starting_email.votes.filter( user=request.user).first() else: starting_email.myvote = None # Tags tag_form = AddTagForm() # Favorites fav_action = "add" if request.user.is_authenticated() and Favorite.objects.filter( thread=thread, user=request.user).exists(): fav_action = "rm" # Category categories = [ (c.name, c.name.upper()) for c in ThreadCategory.objects.all() ] \ + [("", "no category")] category, category_form = get_category_widget( request, thread.category, categories) # Extract relative dates today = datetime.date.today() days_old = today - starting_email.date.date() days_inactive = today - thread.date_active.date() subject = stripped_subject(mlist, starting_email.subject) # Last view last_view = None if request.user.is_authenticated(): last_view_obj, created = LastView.objects.get_or_create( thread=thread, user=request.user) if not created: last_view = last_view_obj.view_date last_view_obj.save() # update timestamp # get the number of unread messages if last_view is None: if request.user.is_authenticated(): unread_count = thread.emails_count else: unread_count = 0 else: unread_count = thread.emails.filter(date__gt=last_view).count() # Flash messages flash_messages = [] flash_msg = request.GET.get("msg") if flash_msg: flash_msg = { "type": FLASH_MESSAGES[flash_msg][0], "msg": FLASH_MESSAGES[flash_msg][1] } flash_messages.append(flash_msg) # TODO: eventually move to a middleware ? # http://djangosnippets.org/snippets/1865/ user_agent = request.META.get('HTTP_USER_AGENT', None) if user_agent: is_bot = robot_detection.is_robot(user_agent) else: is_bot = True context = { 'mlist': mlist, 'thread': thread, 'starting_email': starting_email, 'subject': subject, 'addtag_form': tag_form, 'month': thread.date_active, 'months_list': get_months(mlist), 'days_inactive': days_inactive.days, 'days_old': days_old.days, 'sort_mode': sort_mode, 'fav_action': fav_action, 'reply_form': ReplyForm(), 'is_bot': is_bot, 'num_comments': thread.emails_count - 1, 'last_view': last_view, 'unread_count': unread_count, 'category_form': category_form, 'category': category, 'flash_messages': flash_messages, } if is_bot: # Don't rely on AJAX to load the replies # The limit is a safety measure, don't let a bot kill the DB context["replies"] = _get_thread_replies(request, thread, limit=1000) return render(request, "hyperkitty/thread.html", context)
def flag_robots(doc): """Flag events which are created by robots.""" doc['is_robot'] = 'user_agent' in doc and is_robot(doc['user_agent']) return doc
def create_training_data(event, context): logging.info(json.dumps({'event': event})) correlation_id = get_correlation_id(event=event) s3_resource = boto3.resource('s3') data = open('./data.log') reader = geoip2.database.Reader('./GeoLite2-City.mmdb') parsed = cloudfront_log_parser.parse(data) output = io.StringIO() fieldnames = [ 'ip_address', 'day_of_week', 'hour_of_day', 'minute_of_hour', 'edge', 'response_size', 'http_method', 'cloudfront_host', 'path', 'status_code', 'status_code_group', 'aborted', 'referrer', 'user_agent', 'browser_family', 'browser_version', 'os_family', 'os_version', 'device', 'is_mobile', 'is_tablet', 'is_pc', 'is_touch_capable', 'is_bot', 'querystring', 'edge_result_type', 'request_host', 'request_protocol', 'request_size', 'response_duration', 'ssl_protocol', 'ssl_cypher', 'edge_response_result_type', 'country', 'city', 'latitude', 'longitude', 'is_malicious_bot' ] writer = csv.DictWriter(output, fieldnames=fieldnames, quoting=csv.QUOTE_NONNUMERIC) writer.writeheader() return_object = [] pool = ThreadPool(12) return_object = pool.starmap(format_data, zip(parsed, itertools.repeat(reader))) pool.close() pool.join() reader.close() # with ThreadPoolExecutor(max_workers=12) as executor: # for arg in zip(parsed, itertools.repeat(reader)): # future = executor.submit(format_data, arg) # return_object = future.result() whitelisted_ips = [''] bad_ips = ['111.88.139.9', '111.88.139.9'] bad_agents = [ 'curl', 'wget', 'Python', 'python', 'ruby', '-', 'Java', 'PhantomJS' ] logging.debug( json.dumps({ 'message': 'multithreaded return object', 'object': '{}'.format(return_object) })) for row in return_object: is_malicious_bot = True for i in bad_agents: if i in row['user_agent']: is_malicious_bot = True if robot_detection.is_robot(row['ip_address']): is_malicious_bot = False if row['ip_address'] in whitelisted_ips: is_malicious_bot = False if row['ip_address'] in bad_ips: is_malicious_bot = True if 'health' in row['path']: is_malicious_bot = False row['is_malicious_bot'] = is_malicious_bot # Ignore this metadata when training row['ip_address'] = '' row['request_host'] = '' writer.writerow(row) s3_resource.Object(os.environ['TRANSFORMED_BUCKET'], "training-data.csv").put(Body=output.getvalue()) logging.info(json.dumps({'message': 'Done!'}))