def sanitize_html(content): """ Converts content to safe HTML """ sanitizer = Sanitizer() content = sanitizer.sanitize(content) return content
def edit_profile(request): user = get_object_or_404(get_user_model(), pk=request.user.pk) if hasattr(user, 'userprofile'): up_instance = user.userprofile else: up_instance = None if request.method == "POST": user_form = P7UserChangeForm(request.POST, instance=user, initial={'confirm_email': user.email}) profile_form = UserProfileForm(request.POST, request.FILES, instance=up_instance) if all([user_form.is_valid(), profile_form.is_valid()]): user_form.save() profile = profile_form.save(commit=False) profile.user = user sanitizer = Sanitizer() profile.bio = sanitizer.sanitize(profile.bio) profile.save() return redirect(reverse('accounts:profile')) else: # GET user_form = P7UserChangeForm(instance=user, initial={'confirm_email': user.email}) profile_form = UserProfileForm(instance=up_instance) template = 'accounts/edit_profile.html' context = {'user_form': user_form, 'profile_form': profile_form} return render(request, template, context)
def parse_opportunity(self, response): full_payload = "" # Blurt it all out in full for now. sanitizer = Sanitizer({ 'tags': ('hr', 'a', 'br', 'p', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'table', 'tbody', 'tr', 'td'), }) matches = response.xpath('.//div[@class="column-one-whole"]').extract() for item in matches: full_payload = "".join(sanitizer.sanitize(item)) self.store_opportunity({ 'title': response.css('header h1::text').extract_first().strip(), 'url': response.request.url, 'incomplete_applications': response.css('#incomplete-applications .big-statistic::text' ).extract_first().strip(), 'complete_applications': response.css('#completed-applications .big-statistic::text'). extract_first().strip(), 'full_text': full_payload, })
def getList(): sanitizer = Sanitizer() files = os.listdir('data') listStr = '' for item in files: listStr += '<li><a href="index.py?id={name}">{name}</a></li>'.format( name=sanitizer.sanitize(item)) return listStr
def getList(): sanitizer = Sanitizer() number_list = "" for file in os.listdir("C:/Bitnami/wampstack-7.3.6-2/apache2/htdocs/data"): number_list = number_list + "<li><a href = 'index.py?id={name}'>{name}</a></li>".format( name=file) number_list = sanitizer.sanitize(number_list) return number_list
def do_GET(self): try: request_url = urlparse(self.requestline) all_companies = request_url.query.split( ',') # Get all company names from URL allJobsXml = ET.Element('jobs') for i in range(len(all_companies)): r = requests.get( f"https://apply.workable.com/api/v1/widget/accounts/{all_companies[i].split()[0]}?details=true" ) data = r.json() for job in data["jobs"]: jobXml = ET.SubElement(allJobsXml, 'job') titleXml = ET.SubElement(jobXml, 'title') titleXml.text = job["title"] employerXml = ET.SubElement(jobXml, 'Email') employerXml.text = f"team+{all_companies[i].split()[0]}@climate.careers" urlXml = ET.SubElement(jobXml, 'url') urlXml.text = job["url"] locationXml = ET.SubElement(jobXml, 'location') locationXml.text = job["city"] + ", " + job[ "state"] + ", " + job["country"] # HTML Sanitizer sanitizer = Sanitizer({ 'tags': ('em', 'strong', 'a', 'p', 'br', 'span', 'ul', 'ol', 'li', 'h1', 'h2', 'h3', 'hr'), 'attributes': { "a": "href" }, }) descriptionXml = ET.SubElement(jobXml, 'description') descriptionXml.text = sanitizer.sanitize( job["description"]) idXml = ET.SubElement(jobXml, 'id') idXml.text = job["shortcode"] self.send_response(200) self.send_header('Content-type', 'text/xml') self.send_header( 'Cache-Control', 'public, immutable, no-transform, s-maxage=3600, max-age=3600') self.end_headers() message = ET.tostring(allJobsXml) self.wfile.write(message) return except Exception as e: self.send_response(500) self.send_header('Content-type', 'text/html') self.end_headers() message = f"<h1>Internal Error</h1><p>Sorry, there was a problem. Make sure the employer's name is " \ "included in the URL query and/or is the correct name.</p> " message2 = f"<p>{e}</p>" self.wfile.write(message2.encode()) return
def post(self): """Update announcement""" user = self.get_current_user() sanitizer = Sanitizer() # announcement = self.get_body_argument("announcement") announcement = sanitizer.sanitize( self.get_body_argument("announcement")) self.queue.update(user["name"], announcement) self.redirect(self.application.reverse_url("view"))
def get_list(): sanitizer = Sanitizer() files = os.listdir('data') str_lst = '' for item in files: item = sanitizer.sanitize(item) str_lst = str_lst + '<li><a href="index.py?id={name}">{name}</a></li>'.format( name=item) return str_lst
def handle(event, context): ddb = boto3.resource('dynamodb') messages_table = MessagesStore(ddb) if "getAllMessages" in json.loads(event["body"]): connections_url = "https://" + event["requestContext"][ "domainName"] + "/" + os.environ["Stage"] gatewayapi = boto3.client("apigatewaymanagementapi", endpoint_url=connections_url) apigateway_response = gatewayapi.post_to_connection( ConnectionId=event["requestContext"]["connectionId"], Data=json.dumps(messages_table.get_messages()).encode('utf-8')) return {"statusCode": 200} connections_table = ConnectionsStore(ddb) data = {"connectionId": event["requestContext"]["connectionId"]} connections_ids = connections_table.get_connections() sanitizer = Sanitizer() chat_message = sanitizer.sanitize(json.loads(event["body"])["data"]) lambda_client = boto3.client('lambda') analysis_handler_function_name = "{}-{}".format(os.environ["Stage"], "AnalysisHandler") response = lambda_client.invoke( FunctionName=analysis_handler_function_name, InvocationType='RequestResponse', Payload=json.dumps({"message": chat_message})) sentiment_response = response['Payload'].read().decode("utf-8") messageToSend = { "action": "sendMessage", "data": { "message": chat_message, "sentiment": sentiment_response } } message_data_for_ddb = { "date": date.today().strftime("%d-%m-%Y"), "timestamp": int(round(time.time() * 1000)), "message": messageToSend["data"] } messages_table.add_new_message(**message_data_for_ddb) for connection_id in connections_ids: connections_url = "https://" + event["requestContext"][ "domainName"] + "/" + os.environ["Stage"] gatewayapi = boto3.client("apigatewaymanagementapi", endpoint_url=connections_url) apigateway_response = gatewayapi.post_to_connection( ConnectionId=connection_id['connectionId'], Data=json.dumps(messageToSend).encode('utf-8')) return {"statusCode": 200}
def _compose_html(self, tagged_text, hard_words): sanitizer = Sanitizer({ 'tags': {'p', 'mark', 'span'}, 'attributes': { 'span': ('class', ) }, 'empty': set(), 'separate': {'p', 'mark', 'span'}, }) html_paragraphs = [] for paragraph in tagged_text.iter('chunk'): sentences = [] for sentence in paragraph.iter('sentence'): no_space = False s = '' for pos, el in enumerate(sentence): if el.tag == 'ns': no_space = True continue elif el.tag == 'tok': orth = el.xpath('orth/text()')[0] base = el.xpath('lex/base/text()')[0] if orth in hard_words or base in hard_words: orth = '<mark>{}</mark>'.format(orth) if no_space: s += orth no_space = False else: s += ' ' + orth sentence_length = sentence.xpath('count(./tok)') if sentence_length > 20: self.long_sentence_count += 1 s = '<span class="long_sentence">{}</span>'.format(s) sentences.append(s) html_paragraphs.append('<p>{}</p>'.format(' '.join(sentences))) html_text = ''.join(html_paragraphs) return sanitizer.sanitize(html_text)
def get_content(url): try: web_page = requests.get(url, allow_redirects=True, timeout=5) except requests.exceptions.Timeout as err: raise ServerException( description= "Failed to fetch web page in time! Skipping preprocessing") soup = BeautifulSoup(web_page.content, "html.parser") content = str(soup.section) try: for field in soup.find('ul', attrs={"class": "details"}).children: field_str = str(field) field_str = field_str.replace("\n", "") print(field_str) if "#icon-contract" in field_str: m = findall(r"svg>(\w|\s)+<\/li>", field_str) # print(field.string) sanitiser = Sanitizer() content = sanitiser.sanitize(content) return {"post_details": content, "fields": {}} except: raise InvalidUserInput( description="Couldn't find details for that job")
#!python3 #-*- coding: utf-8 -*- import sys, codecs, os sys.stdout = codecs.getwriter("utf-8")(sys.stdout.detach()) import cgi, cgitb import view cgitb.enable() print("Content-Type: text/html; charset=utf-8\r\n") print() form = cgi.FieldStorage() from html_sanitizer import Sanitizer sanitizer = Sanitizer() if "id" in form: title = pageId = form["id"].value description = open("data/"+pageId, encoding="utf-8").read() description = sanitizer.sanitize(description) title = sanitizer.sanitize(title) update_link = "<a href = 'update.py?id={pageId}'>update</a>".format(pageId = pageId) delete_action = ''' <form action="process_delete.py" method="post"> <input type = "hidden" name = "pageId" value="{}"> <input type = "submit" value = "delete"> </form> '''.format(pageId) else: title = pageId = "Welcome" description = "Hello Web" update_link = "" delete_action = "" print(pageId) print("""<!DOCTYPE html>
print(f"Process {a['url']}...") article = json.loads( subprocess.getoutput(f"../api/read-article {a['url']}")) article['url'] = a['url'] article['body'] = markdown.markdown(article['body'], extensions=md_extensions, output_format='html5') article['body'] = update_link(article['body'], a['url']) article['comments_body'] = '' for comment in article.get('comments', {}): sanitizer = Sanitizer() comment['body'] = markdown.markdown(comment['body'], extensions=md_extensions, output_format='html5') comment['body'] = sanitizer.sanitize(comment['body']) if comment['site']: if comment['site'].startswith( 'https://') or comment['site'].startswith('http://'): comment['site'] = sanitizer.sanitize(comment['site']) else: comment['site'] = None article['comments_body'] += comment_tpl(comment) app_body += article_tpl(article) full_html = main_tpl({ 'body': app_body, 'title': f"{article['title']} | Duke's Blog" }) if not os.path.exists(a['url']):
def update_temp(): # Get local cottage temp f = open('/sys/bus/w1/devices/28-000007171178/w1_slave', 'r') cot = f.readline() if "YES" not in cot: raise Exception("Bad CRC for Cottage temp sensor") cot = f.readline() cot = cot.split("t=") cot = cot[1].rstrip("\n") cot = temp_to_format_str(float(cot)/1000) f.close() # Get local cottage temp and humidity from DHT f = open('/sys/bus/iio/devices/iio:device0/in_temp_input', 'r') success = False tries = 0 while not(success): try: cot2 = temp_to_format_str(float(f.readline())/1000) success = True except OSError as e: tries = tries + 1 if tries > 10: cot2 = "ERR" break f.close() f = open('/sys/bus/iio/devices/iio:device0/in_humidityrelative_input', 'r') success = False tries = 0 while not(success): try: hm = str(round(float(f.readline())/1000,2)) + " %" success = True except OSError as e: tries = tries + 1 if tries > 10: hm = "ERR" break f.close() tempCottage.set(cot) tempCottage2.set(cot2) humid.set(hm) # Get info from pumphouse r = requests.get("http://192.168.0.180/temp") # Set encoding to UTF-8 since the HTML is in it but apparently the ESP sends a different one r.encoding = "UTF-8" # Throw error if applicable if not(r.ok): lakeTempVar = "ERR" outsideTempVar = "ERR" intakeTempVar = "ERR" ambientTempVar = "ERR" else: # Clean up and split response sanitizer = Sanitizer() txt = sanitizer.sanitize(r.text) # Split on line breaks and remove all but temp info txtSplit = txt.split("<br>")[1:5] for item in txtSplit: tempS = item.split(":") tmpValue = float(removeNonDecimal(tempS[1])) if "Lake" == tempS[0]: lakeTempVar = temp_to_format_str(tmpValue) elif "Outside" == tempS[0]: outsideTempVar = temp_to_format_str(tmpValue) elif "Pump intake" == tempS[0]: intakeTempVar = temp_to_format_str(tmpValue) elif "Ambient outdoor" == tempS[0]: ambientTempVar = temp_to_format_str(tmpValue) tempLake.set(lakeTempVar) tempOutside.set(outsideTempVar) tempIntake.set(intakeTempVar) tempAmb.set(ambientTempVar) csvwriter.writerow({'Time': time.strftime("%F %T"), 'Outside Temp': removeNonDecimal(outsideTempVar), 'Cottage Temp': removeNonDecimal(cot), 'Cottage Temp (DHT)': removeNonDecimal(cot2), 'Lake Temp': removeNonDecimal(lakeTempVar), 'Pump Intake': removeNonDecimal(intakeTempVar), 'Ambient Lake Temp': removeNonDecimal(ambientTempVar), 'Humidity': removeNonDecimal(hm)})